aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/cpusets.txt6
-rw-r--r--Documentation/filesystems/00-INDEX4
-rw-r--r--Documentation/filesystems/relay.txt479
-rw-r--r--Documentation/filesystems/relayfs.txt442
-rw-r--r--Documentation/sysctl/fs.txt20
-rw-r--r--Documentation/sysctl/kernel.txt20
-rw-r--r--Makefile2
-rw-r--r--arch/i386/Kconfig4
-rw-r--r--arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c9
-rw-r--r--drivers/base/node.c2
-rw-r--r--drivers/cdrom/gscd.c2
-rw-r--r--drivers/char/moxa.c8
-rw-r--r--drivers/char/tty_io.c808
-rw-r--r--drivers/char/tty_ioctl.c59
-rw-r--r--drivers/char/vt_ioctl.c2
-rw-r--r--drivers/ieee1394/ohci1394.c4
-rw-r--r--drivers/md/dm-raid1.c4
-rw-r--r--drivers/md/md.c13
-rw-r--r--drivers/md/raid1.c7
-rw-r--r--drivers/mtd/nand/ams-delta.c10
-rw-r--r--drivers/mtd/nand/nand_base.c6
-rw-r--r--drivers/rtc/rtc-s3c.c124
-rw-r--r--drivers/scsi/ide-scsi.c2
-rw-r--r--drivers/video/imacfb.c4
-rw-r--r--drivers/video/matrox/g450_pll.c8
-rw-r--r--fs/block_dev.c114
-rw-r--r--fs/eventpoll.c4
-rw-r--r--fs/exec.c2
-rw-r--r--fs/ext2/super.c2
-rw-r--r--fs/ext3/balloc.c6
-rw-r--r--fs/jbd/commit.c6
-rw-r--r--fs/jbd/journal.c92
-rw-r--r--fs/jbd/transaction.c9
-rw-r--r--fs/minix/inode.c13
-rw-r--r--fs/proc/proc_misc.c2
-rw-r--r--fs/reiserfs/xattr.c2
-rw-r--r--fs/ufs/inode.c35
-rw-r--r--fs/ufs/truncate.c77
-rw-r--r--include/asm-arm/arch-s3c2410/regs-rtc.h2
-rw-r--r--include/asm-i386/mmzone.h2
-rw-r--r--include/linux/compat_ioctl.h1
-rw-r--r--include/linux/fs.h3
-rw-r--r--include/linux/jbd.h3
-rw-r--r--include/linux/node.h10
-rw-r--r--include/linux/tty.h1
-rw-r--r--include/linux/vt.h1
-rw-r--r--kernel/cpuset.c35
-rw-r--r--kernel/futex.c2
-rw-r--r--kernel/sched.c4
-rw-r--r--kernel/stop_machine.c1
-rw-r--r--mm/swapfile.c3
-rw-r--r--net/dccp/ccids/ccid3.c149
-rw-r--r--net/dccp/ccids/ccid3.h5
-rw-r--r--net/dccp/ccids/lib/loss_interval.c34
-rw-r--r--net/dccp/ccids/lib/loss_interval.h7
-rw-r--r--net/dccp/ccids/lib/packet_history.c141
-rw-r--r--net/dccp/ccids/lib/packet_history.h11
57 files changed, 1855 insertions, 973 deletions
diff --git a/Documentation/cpusets.txt b/Documentation/cpusets.txt
index 159e2a0c3e8..76b44290c15 100644
--- a/Documentation/cpusets.txt
+++ b/Documentation/cpusets.txt
@@ -217,6 +217,12 @@ exclusive cpuset. Also, the use of a Linux virtual file system (vfs)
to represent the cpuset hierarchy provides for a familiar permission
and name space for cpusets, with a minimum of additional kernel code.
+The cpus file in the root (top_cpuset) cpuset is read-only.
+It automatically tracks the value of cpu_online_map, using a CPU
+hotplug notifier. If and when memory nodes can be hotplugged,
+we expect to make the mems file in the root cpuset read-only
+as well, and have it track the value of node_online_map.
+
1.4 What are exclusive cpusets ?
--------------------------------
diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX
index 66fdc0744fe..16dec61d767 100644
--- a/Documentation/filesystems/00-INDEX
+++ b/Documentation/filesystems/00-INDEX
@@ -62,8 +62,8 @@ ramfs-rootfs-initramfs.txt
- info on the 'in memory' filesystems ramfs, rootfs and initramfs.
reiser4.txt
- info on the Reiser4 filesystem based on dancing tree algorithms.
-relayfs.txt
- - info on relayfs, for efficient streaming from kernel to user space.
+relay.txt
+ - info on relay, for efficient streaming from kernel to user space.
romfs.txt
- description of the ROMFS filesystem.
smbfs.txt
diff --git a/Documentation/filesystems/relay.txt b/Documentation/filesystems/relay.txt
new file mode 100644
index 00000000000..d6788dae034
--- /dev/null
+++ b/Documentation/filesystems/relay.txt
@@ -0,0 +1,479 @@
+relay interface (formerly relayfs)
+==================================
+
+The relay interface provides a means for kernel applications to
+efficiently log and transfer large quantities of data from the kernel
+to userspace via user-defined 'relay channels'.
+
+A 'relay channel' is a kernel->user data relay mechanism implemented
+as a set of per-cpu kernel buffers ('channel buffers'), each
+represented as a regular file ('relay file') in user space. Kernel
+clients write into the channel buffers using efficient write
+functions; these automatically log into the current cpu's channel
+buffer. User space applications mmap() or read() from the relay files
+and retrieve the data as it becomes available. The relay files
+themselves are files created in a host filesystem, e.g. debugfs, and
+are associated with the channel buffers using the API described below.
+
+The format of the data logged into the channel buffers is completely
+up to the kernel client; the relay interface does however provide
+hooks which allow kernel clients to impose some structure on the
+buffer data. The relay interface doesn't implement any form of data
+filtering - this also is left to the kernel client. The purpose is to
+keep things as simple as possible.
+
+This document provides an overview of the relay interface API. The
+details of the function parameters are documented along with the
+functions in the relay interface code - please see that for details.
+
+Semantics
+=========
+
+Each relay channel has one buffer per CPU, each buffer has one or more
+sub-buffers. Messages are written to the first sub-buffer until it is
+too full to contain a new message, in which case it it is written to
+the next (if available). Messages are never split across sub-buffers.
+At this point, userspace can be notified so it empties the first
+sub-buffer, while the kernel continues writing to the next.
+
+When notified that a sub-buffer is full, the kernel knows how many
+bytes of it are padding i.e. unused space occurring because a complete
+message couldn't fit into a sub-buffer. Userspace can use this
+knowledge to copy only valid data.
+
+After copying it, userspace can notify the kernel that a sub-buffer
+has been consumed.
+
+A relay channel can operate in a mode where it will overwrite data not
+yet collected by userspace, and not wait for it to be consumed.
+
+The relay channel itself does not provide for communication of such
+data between userspace and kernel, allowing the kernel side to remain
+simple and not impose a single interface on userspace. It does
+provide a set of examples and a separate helper though, described
+below.
+
+The read() interface both removes padding and internally consumes the
+read sub-buffers; thus in cases where read(2) is being used to drain
+the channel buffers, special-purpose communication between kernel and
+user isn't necessary for basic operation.
+
+One of the major goals of the relay interface is to provide a low
+overhead mechanism for conveying kernel data to userspace. While the
+read() interface is easy to use, it's not as efficient as the mmap()
+approach; the example code attempts to make the tradeoff between the
+two approaches as small as possible.
+
+klog and relay-apps example code
+================================
+
+The relay interface itself is ready to use, but to make things easier,
+a couple simple utility functions and a set of examples are provided.
+
+The relay-apps example tarball, available on the relay sourceforge
+site, contains a set of self-contained examples, each consisting of a
+pair of .c files containing boilerplate code for each of the user and
+kernel sides of a relay application. When combined these two sets of
+boilerplate code provide glue to easily stream data to disk, without
+having to bother with mundane housekeeping chores.
+
+The 'klog debugging functions' patch (klog.patch in the relay-apps
+tarball) provides a couple of high-level logging functions to the
+kernel which allow writing formatted text or raw data to a channel,
+regardless of whether a channel to write into exists or not, or even
+whether the relay interface is compiled into the kernel or not. These
+functions allow you to put unconditional 'trace' statements anywhere
+in the kernel or kernel modules; only when there is a 'klog handler'
+registered will data actually be logged (see the klog and kleak
+examples for details).
+
+It is of course possible to use the relay interface from scratch,
+i.e. without using any of the relay-apps example code or klog, but
+you'll have to implement communication between userspace and kernel,
+allowing both to convey the state of buffers (full, empty, amount of
+padding). The read() interface both removes padding and internally
+consumes the read sub-buffers; thus in cases where read(2) is being
+used to drain the channel buffers, special-purpose communication
+between kernel and user isn't necessary for basic operation. Things
+such as buffer-full conditions would still need to be communicated via
+some channel though.
+
+klog and the relay-apps examples can be found in the relay-apps
+tarball on http://relayfs.sourceforge.net
+
+The relay interface user space API
+==================================
+
+The relay interface implements basic file operations for user space
+access to relay channel buffer data. Here are the file operations
+that are available and some comments regarding their behavior:
+
+open() enables user to open an _existing_ channel buffer.
+
+mmap() results in channel buffer being mapped into the caller's
+ memory space. Note that you can't do a partial mmap - you
+ must map the entire file, which is NRBUF * SUBBUFSIZE.
+
+read() read the contents of a channel buffer. The bytes read are
+ 'consumed' by the reader, i.e. they won't be available
+ again to subsequent reads. If the channel is being used
+ in no-overwrite mode (the default), it can be read at any
+ time even if there's an active kernel writer. If the
+ channel is being used in overwrite mode and there are
+ active channel writers, results may be unpredictable -
+ users should make sure that all logging to the channel has
+ ended before using read() with overwrite mode. Sub-buffer
+ padding is automatically removed and will not be seen by
+ the reader.
+
+sendfile() transfer data from a channel buffer to an output file
+ descriptor. Sub-buffer padding is automatically removed
+ and will not be seen by the reader.
+
+poll() POLLIN/POLLRDNORM/POLLERR supported. User applications are
+ notified when sub-buffer boundaries are crossed.
+
+close() decrements the channel buffer's refcount. When the refcount
+ reaches 0, i.e. when no process or kernel client has the
+ buffer open, the channel buffer is freed.
+
+In order for a user application to make use of relay files, the
+host filesystem must be mounted. For example,
+
+ mount -t debugfs debugfs /debug
+
+NOTE: the host filesystem doesn't need to be mounted for kernel
+ clients to create or use channels - it only needs to be
+ mounted when user space applications need access to the buffer
+ data.
+
+
+The relay interface kernel API
+==============================
+
+Here's a summary of the API the relay interface provides to in-kernel clients:
+
+TBD(curr. line MT:/API/)
+ channel management functions:
+
+ relay_open(base_filename, parent, subbuf_size, n_subbufs,
+ callbacks)
+ relay_close(chan)
+ relay_flush(chan)
+ relay_reset(chan)
+
+ channel management typically called on instigation of userspace:
+
+ relay_subbufs_consumed(chan, cpu, subbufs_consumed)
+
+ write functions:
+
+ relay_write(chan, data, length)
+ __relay_write(chan, data, length)
+ relay_reserve(chan, length)
+
+ callbacks:
+
+ subbuf_start(buf, subbuf, prev_subbuf, prev_padding)
+ buf_mapped(buf, filp)
+ buf_unmapped(buf, filp)
+ create_buf_file(filename, parent, mode, buf, is_global)
+ remove_buf_file(dentry)
+
+ helper functions:
+
+ relay_buf_full(buf)
+ subbuf_start_reserve(buf, length)
+
+
+Creating a channel
+------------------
+
+relay_open() is used to create a channel, along with its per-cpu
+channel buffers. Each channel buffer will have an associated file
+created for it in the host filesystem, which can be and mmapped or
+read from in user space. The files are named basename0...basenameN-1
+where N is the number of online cpus, and by default will be created
+in the root of the filesystem (if the parent param is NULL). If you
+want a directory structure to contain your relay files, you should
+create it using the host filesystem's directory creation function,
+e.g. debugfs_create_dir(), and pass the parent directory to
+relay_open(). Users are responsible for cleaning up any directory
+structure they create, when the channel is closed - again the host
+filesystem's directory removal functions should be used for that,
+e.g. debugfs_remove().
+
+In order for a channel to be created and the host filesystem's files
+associated with its channel buffers, the user must provide definitions
+for two callback functions, create_buf_file() and remove_buf_file().
+create_buf_file() is called once for each per-cpu buffer from
+relay_open() and allows the user to create the file which will be used
+to represent the corresponding channel buffer. The callback should
+return the dentry of the file created to represent the channel buffer.
+remove_buf_file() must also be defined; it's responsible for deleting
+the file(s) created in create_buf_file() and is called during
+relay_close().
+
+Here are some typical definitions for these callbacks, in this case
+using debugfs:
+
+/*
+ * create_buf_file() callback. Creates relay file in debugfs.
+ */
+static struct dentry *create_buf_file_handler(const char *filename,
+ struct dentry *parent,
+ int mode,
+ struct rchan_buf *buf,
+ int *is_global)
+{
+ return debugfs_create_file(filename, mode, parent, buf,
+ &relay_file_operations);
+}
+
+/*
+ * remove_buf_file() callback. Removes relay file from debugfs.
+ */
+static int remove_buf_file_handler(struct dentry *dentry)
+{
+ debugfs_remove(dentry);
+
+ return 0;
+}
+
+/*
+ * relay interface callbacks
+ */
+static struct rchan_callbacks relay_callbacks =
+{
+ .create_buf_file = create_buf_file_handler,
+ .remove_buf_file = remove_buf_file_handler,
+};
+
+And an example relay_open() invocation using them:
+
+ chan = relay_open("cpu", NULL, SUBBUF_SIZE, N_SUBBUFS, &relay_callbacks);
+
+If the create_buf_file() callback fails, or isn't defined, channel
+creation and thus relay_open() will fail.
+
+The total size of each per-cpu buffer is calculated by multiplying the
+number of sub-buffers by the sub-buffer size passed into relay_open().
+The idea behind sub-buffers is that they're basically an extension of
+double-buffering to N buffers, and they also allow applications to
+easily implement random-access-on-buffer-boundary schemes, which can
+be important for some high-volume applications. The number and size
+of sub-buffers is completely dependent on the application and even for
+the same application, different conditions will warrant different
+values for these parameters at different times. Typically, the right
+values to use are best decided after some experimentation; in general,
+though, it's safe to assume that having only 1 sub-buffer is a bad
+idea - you're guaranteed to either overwrite data or lose events
+depending on the channel mode being used.
+
+The create_buf_file() implementation can also be defined in such a way
+as to allow the creation of a single 'global' buffer instead of the
+default per-cpu set. This can be useful for applications interested
+mainly in seeing the relative ordering of system-wide events without
+the need to bother with saving explicit timestamps for the purpose of
+merging/sorting per-cpu files in a postprocessing step.
+
+To have relay_open() create a global buffer, the create_buf_file()
+implementation should set the value of the is_global outparam to a
+non-zero value in addition to creating the file that will be used to
+represent the single buffer. In the case of a global buffer,
+create_buf_file() and remove_buf_file() will be called only once. The
+normal channel-writing functions, e.g. relay_write(), can still be
+used - writes from any cpu will transparently end up in the global
+buffer - but since it is a global buffer, callers should make sure
+they use the proper locking for such a buffer, either by wrapping
+writes in a spinlock, or by copying a write function from relay.h and
+creating a local version that internally does the proper locking.
+
+Channel 'modes'
+---------------
+
+relay channels can be used in either of two modes - 'overwrite' or
+'no-overwrite'. The mode is entirely determined by the implementation
+of the subbuf_start() callback, as described below. The default if no
+subbuf_start() callback is defined is 'no-overwrite' mode. If the
+default mode suits your needs, and you plan to use the read()
+interface to retrieve channel data, you can ignore the details of this
+section, as it pertains mainly to mmap() implementations.
+
+In 'overwrite' mode, also known as 'flight recorder' mode, writes
+continuously cycle around the buffer and will never fail, but will
+unconditionally overwrite old data regardless of whether it's actually
+been consumed. In no-overwrite mode, writes will fail, i.e. data will
+be lost, if the number of unconsumed sub-buffers equals the total
+number of sub-buffers in the channel. It should be clear that if
+there is no consumer or if the consumer can't consume sub-buffers fast
+enough, data will be lost in either case; the only difference is
+whether data is lost from the beginning or the end of a buffer.
+
+As explained above, a relay channel is made of up one or more
+per-cpu channel buffers, each implemented as a circular buffer
+subdivided into one or more sub-buffers. Messages are written into
+the current sub-buffer of the channel's current per-cpu buffer via the
+write functions described below. Whenever a message can't fit into
+the current sub-buffer, because there's no room left for it, the
+client is notified via the subbuf_start() callback that a switch to a
+new sub-buffer is about to occur. The client uses this callback to 1)
+initialize the next sub-buffer if appropriate 2) finalize the previous
+sub-buffer if appropriate and 3) return a boolean value indicating
+whether or not to actually move on to the next sub-buffer.
+
+To implement 'no-overwrite' mode, the userspace client would provide
+an implementation of the subbuf_start() callback something like the
+following:
+
+static int subbuf_start(struct rchan_buf *buf,
+ void *subbuf,
+ void *prev_subbuf,
+ unsigned int prev_padding)
+{
+ if (prev_subbuf)
+ *((unsigned *)prev_subbuf) = prev_padding;
+
+ if (relay_buf_full(buf))
+ return 0;
+
+ subbuf_start_reserve(buf, sizeof(unsigned int));
+
+ return 1;
+}
+
+If the current buffer is full, i.e. all sub-buffers remain unconsumed,
+the callback returns 0 to indicate that the buffer switch should not
+occur yet, i.e. until the consumer has had a chance to read the
+current set of ready sub-buffers. For the relay_buf_full() function
+to make sense, the consumer is reponsible for notifying the relay
+interface when sub-buffers have been consumed via
+relay_subbufs_consumed(). Any subsequent attempts to write into the
+buffer will again invoke the subbuf_start() callback with the same
+parameters; only when the consumer has consumed one or more of the
+ready sub-buffers will relay_buf_full() return 0, in which case the
+buffer switch can continue.
+
+The implementation of the subbuf_start() callback for 'overwrite' mode
+would be very similar:
+
+static int subbuf_start(struct rchan_buf *buf,
+ void *subbuf,
+ void *prev_subbuf,
+ unsigned int prev_padding)
+{
+ if (prev_subbuf)
+ *((unsigned *)prev_subbuf) = prev_padding;
+
+ subbuf_start_reserve(buf, sizeof(unsigned int));
+
+ return 1;
+}
+
+In this case, the relay_buf_full() check is meaningless and the
+callback always returns 1, causing the buffer switch to occur
+unconditionally. It's also meaningless for the client to use the
+relay_subbufs_consumed() function in this mode, as it's never
+consulted.
+
+The default subbuf_start() implementation, used if the client doesn't
+define any callbacks, or doesn't define the subbuf_start() callback,
+implements the simplest possible 'no-overwrite' mode, i.e. it does
+nothing but return 0.
+
+Header information can be reserved at the beginning of each sub-buffer
+by calling the subbuf_start_reserve() helper function from within the
+subbuf_start() callback. This reserved area can be used to store
+whatever information the client wants. In the example above, room is
+reserved in each sub-buffer to store the padding count for that
+sub-buffer. This is filled in for the previous sub-buffer in the
+subbuf_start() implementation; the padding value for the previous
+sub-buffer is passed into the subbuf_start() callback along with a
+pointer to the previous sub-buffer, since the padding value isn't
+known until a sub-buffer is filled. The subbuf_start() callback is
+also called for the first sub-buffer when the channel is opened, to
+give the client a chance to reserve space in it. In this case the
+previous sub-buffer pointer passed into the callback will be NULL, so
+the client should check the value of the prev_subbuf pointer before
+writing into the previous sub-buffer.
+
+Writing to a channel
+--------------------
+
+Kernel clients write data into the current cpu's channel buffer using
+relay_write() or __relay_write(). relay_write() is the main logging
+function - it uses local_irqsave() to protect the buffer and should be
+used if you might be logging from interrupt context. If you know
+you'll never be logging from interrupt context, you can use
+__relay_write(), which only disables preemption. These functions
+don't return a value, so you can't determine whether or not they
+failed - the assumption is that you wouldn't want to check a return
+value in the fast logging path anyway, and that they'll always succeed
+unless the buffer is full and no-overwrite mode is being used, in
+which case you can detect a failed write in the subbuf_start()
+callback by calling the relay_buf_full() helper function.
+
+relay_reserve() is used to reserve a slot in a channel buffer which
+can be written to later. This would typically be used in applications
+that need to write directly into a channel buffer without having to
+stage data in a temporary buffer beforehand. Because the actual write
+may not happen immediately after the slot is reserved, applications
+using relay_reserve() can keep a count of the number of bytes actually
+written, either in space reserved in the sub-buffers themselves or as
+a separate array. See the 'reserve' example in the relay-apps tarball
+at http://relayfs.sourceforge.net for an example of how this can be
+done. Because the write is under control of the client and is
+separated from the reserve, relay_reserve() doesn't protect the buffer
+at all - it's up to the client to provide the appropriate
+synchronization when using relay_reserve().
+
+Closing a channel
+-----------------
+
+The client calls relay_close() when it's finished using the channel.
+The channel and its associated buffers are destroyed when there are no
+longer any references to any of the channel buffers. relay_flush()
+forces a sub-buffer switch on all the channel buffers, and can be used
+to finalize and process the last sub-buffers before the channel is
+closed.
+
+Misc
+----
+
+Some applications may want to keep a channel around and re-use it
+rather than open and close a new channel for each use. relay_reset()
+can be used for this purpose - it resets a channel to its initial
+state without reallocating channel buffer memory or destroying
+existing mappings. It should however only be called when it's safe to
+do so, i.e. when the channel isn't currently being written to.
+
+Finally, there are a couple of utility callbacks that can be used for
+different purposes. buf_mapped() is called whenever a channel buffer
+is mmapped from user space and buf_unmapped() is called when it's
+unmapped. The client can use this notification to trigger actions
+within the kernel application, such as enabling/disabling logging to
+the channel.
+
+
+Resources
+=========
+
+For news, example code, mailing list, etc. see the relay interface homepage:
+
+ http://relayfs.sourceforge.net
+
+
+Credits
+=======
+
+The ideas and specs for the relay interface came about as a result of
+discussions on tracing involving the following:
+
+Michel Dagenais <michel.dagenais@polymtl.ca>
+Richard Moore <richardj_moore@uk.ibm.com>
+Bob Wisniewski <bob@watson.ibm.com>
+Karim Yaghmour <karim@opersys.com>
+Tom Zanussi <zanussi@us.ibm.com>
+
+Also thanks to Hubertus Franke for a lot of useful suggestions and bug
+reports.
diff --git a/Documentation/filesystems/relayfs.txt b/Documentation/filesystems/relayfs.txt
deleted file mode 100644
index 5832377b734..00000000000
--- a/Documentation/filesystems/relayfs.txt
+++ /dev/null
@@ -1,442 +0,0 @@
-
-relayfs - a high-speed data relay filesystem
-============================================
-
-relayfs is a filesystem designed to provide an efficient mechanism for
-tools and facilities to relay large and potentially sustained streams
-of data from kernel space to user space.
-
-The main abstraction of relayfs is the 'channel'. A channel consists
-of a set of per-cpu kernel buffers each represented by a file in the
-relayfs filesystem. Kernel clients write into a channel using
-efficient write functions which automatically log to the current cpu's
-channel buffer. User space applications mmap() the per-cpu files and
-retrieve the data as it becomes available.
-
-The format of the data logged into the channel buffers is completely
-up to the relayfs client; relayfs does however provide hooks which
-allow clients to impose some structure on the buffer data. Nor does
-relayfs implement any form of data filtering - this also is left to
-the client. The purpose is to keep relayfs as simple as possible.
-
-This document provides an overview of the relayfs API. The details of
-the function parameters are documented along with the functions in the
-filesystem code - please see that for details.
-
-Semantics
-=========
-
-Each relayfs channel has one buffer per CPU, each buffer has one or
-more sub-buffers. Messages are written to the first sub-buffer until
-it is too full to contain a new message, in which case it it is
-written to the next (if available). Messages are never split across
-sub-buffers. At this point, userspace can be notified so it empties
-the first sub-buffer, while the kernel continues writing to the next.
-
-When notified that a sub-buffer is full, the kernel knows how many
-bytes of it are padding i.e. unused. Userspace can use this knowledge
-to copy only valid data.
-
-After copying it, userspace can notify the kernel that a sub-buffer
-has been consumed.
-
-relayfs can operate in a mode where it will overwrite data not yet
-collected by userspace, and not wait for it to consume it.
-
-relayfs itself does not provide for communication of such data between
-userspace and kernel, allowing the kernel side to remain simple and
-not impose a single interface on userspace. It does provide a set of
-examples and a separate helper though, described below.
-
-klog and relay-apps example code
-================================
-
-relayfs itself is ready to use, but to make things easier, a couple
-simple utility functions and a set of examples are provided.
-
-The relay-apps example tarball, available on the relayfs sourceforge
-site, contains a set of self-contained examples, each consisting of a
-pair of .c files containing boilerplate code for each of the user and
-kernel sides of a relayfs application; combined these two sets of
-boilerplate code provide glue to easily stream data to disk, without
-having to bother with mundane housekeeping chores.
-
-The 'klog debugging functions' patch (klog.patch in the relay-apps
-tarball) provides a couple of high-level logging functions to the
-kernel which allow writing formatted text or raw data to a channel,
-regardless of whether a channel to write into exists or not, or
-whether relayfs is compiled into the kernel or is configured as a
-module. These functions allow you to put unconditional 'trace'
-statements anywhere in the kernel or kernel modules; only when there
-is a 'klog handler' registered will data actually be logged (see the
-klog and kleak examples for details).
-
-It is of course possible to use relayfs from scratch i.e. without
-using any of the relay-apps example code or klog, but you'll have to
-implement communication between userspace and kernel, allowing both to
-convey the state of buffers (full, empty, amount of padding).
-
-klog and the relay-apps examples can be found in the relay-apps
-tarball on http://relayfs.sourceforge.net
-
-
-The relayfs user space API
-==========================
-
-relayfs implements basic file operations for user space access to
-relayfs channel buffer data. Here are the file operations that are
-available and some comments regarding their behavior:
-
-open() enables user to open an _existing_ buffer.
-
-mmap() results in channel buffer being mapped into the caller's
- memory space. Note that you can't do a partial mmap - you must
- map the entire file, which is NRBUF * SUBBUFSIZE.
-
-read() read the contents of a channel buffer. The bytes read are
- 'consumed' by the reader i.e. they won't be available again
- to subsequent reads. If the channel is being used in
- no-overwrite mode (the default), it can be read at any time
- even if there's an active kernel writer. If the channel is
- being used in overwrite mode and there are active channel
- writers, results may be unpredictable - users should make
- sure that all logging to the channel has ended before using
- read() with overwrite mode.
-
-poll() POLLIN/POLLRDNORM/POLLERR supported. User applications are
- notified when sub-buffer boundaries are crossed.
-
-close() decrements the channel buffer's refcount. When the refcount
- reaches 0 i.e. when no process or kernel client has the buffer
- open, the channel buffer is freed.
-
-
-In order for a user application to make use of relayfs files, the
-relayfs filesystem must be mounted. For example,
-
- mount -t relayfs relayfs /mnt/relay
-
-NOTE: relayfs doesn't need to be mounted for kernel clients to create
- or use channels - it only needs to be mounted when user space
- applications need access to the buffer data.
-
-
-The relayfs kernel API
-======================
-
-Here's a summary of the API relayfs provides to in-kernel clients:
-
-
- channel management functions:
-
- relay_open(base_filename, parent, subbuf_size, n_subbufs,
- callbacks)
- relay_close(chan)
- relay_flush(chan)
- relay_reset(chan)
- relayfs_create_dir(name, parent)
- relayfs_remove_dir(dentry)
- relayfs_create_file(name, parent, mode, fops, data)
- relayfs_remove_file(dentry)
-
- channel management typically called on instigation of userspace:
-
- relay_subbufs_consumed(chan, cpu, subbufs_consumed)
-
- write functions:
-
- relay_write(chan, data, length)
- __relay_write(chan, data, length)
- relay_reserve(chan, length)
-
- callbacks:
-
- subbuf_start(buf, subbuf, prev_subbuf, prev_padding)
- buf_mapped(buf, filp)
- buf_unmapped(buf, filp)
- create_buf_file(filename, parent, mode, buf, is_global)
- remove_buf_file(dentry)
-
- helper functions:
-
- relay_buf_full(buf)
- subbuf_start_reserve(buf, length)
-
-
-Creating a channel
-------------------
-
-relay_open() is used to create a channel, along with its per-cpu
-channel buffers. Each channel buffer will have an associated file
-created for it in the relayfs filesystem, which can be opened and
-mmapped from user space if desired. The files are named
-basename0...basenameN-1 where N is the number of online cpus, and by
-default will be created in the root of the filesystem. If you want a
-directory structure to contain your relayfs files, you can create it
-with relayfs_create_dir() and pass the parent directory to
-relay_open(). Clients are responsible for cleaning up any directory
-structure they create when the channel is closed - use
-relayfs_remove_dir() for that.
-
-The total size of each per-cpu buffer is calculated by multiplying the
-number of sub-buffers by the sub-buffer size passed into relay_open().
-The idea behind sub-buffers is that they're basically an extension of
-double-buffering to N buffers, and they also allow applications to
-easily implement random-access-on-buffer-boundary schemes, which can
-be important for some high-volume applications. The number and size
-of sub-buffers is completely dependent on the application and even for
-the same application, different conditions will warrant different
-values for these parameters at different times. Typically, the right
-values to use are best decided after some experimentation; in general,
-though, it's safe to assume that having only 1 sub-buffer is a bad
-idea - you're guaranteed to either overwrite data or lose events
-depending on the channel mode being used.
-
-Channel 'modes'
----------------
-
-relayfs channels can be used in either of two modes - 'overwrite' or
-'no-overwrite'. The mode is entirely determined by the implementation
-of the subbuf_start() callback, as described below. In 'overwrite'
-mode, also known as 'flight recorder' mode, writes continuously cycle
-around the buffer and will never fail, but will unconditionally
-overwrite old data regardless of whether it's actually been consumed.
-In no-overwrite mode, writes will fail i.e. data will be lost, if the
-number of unconsumed sub-buffers equals the total number of
-sub-buffers in the channel. It should be clear that if there is no
-consumer or if the consumer can't consume sub-buffers fast enought,
-data will be lost in either case; the only difference is whether data
-is lost from the beginning or the end of a buffer.
-
-As explained above, a relayfs channel is made of up one or more
-per-cpu channel buffers, each implemented as a circular buffer
-subdivided into one or more sub-buffers. Messages are written into
-the current sub-buffer of the channel's current per-cpu buffer via the
-write functions described below. Whenever a message can't fit into
-the current sub-buffer, because there's no room left for it, the
-client is notified via the subbuf_start() callback that a switch to a
-new sub-buffer is about to occur. The client uses this callback to 1)
-initialize the next sub-buffer if appropriate 2) finalize the previous
-sub-buffer if appropriate and 3) return a boolean value indicating
-whether or not to actually go ahead with the sub-buffer switch.
-
-To implement 'no-overwrite' mode, the userspace client would provide
-an implementation of the subbuf_start() callback something like the
-following:
-
-static int subbuf_start(struct rchan_buf *buf,
- void *subbuf,
- void *prev_subbuf,
- unsigned int prev_padding)
-{
- if (prev_subbuf)
- *((unsigned *)prev_subbuf) = prev_padding;
-
- if (relay_buf_full(buf))
- return 0;
-
- subbuf_start_reserve(buf, sizeof(unsigned int));
-
- return 1;
-}
-
-If the current buffer is full i.e. all sub-buffers remain unconsumed,
-the callback returns 0 to indicate that the buffer switch should not
-occur yet i.e. until the consumer has had a chance to read the current
-set of ready sub-buffers. For the relay_buf_full() function to make
-sense, the consumer is reponsible for notifying relayfs when
-sub-buffers have been consumed via relay_subbufs_consumed(). Any
-subsequent attempts to write into the buffer will again invoke the
-subbuf_start() callback with the same parameters; only when the
-consumer has consumed one or more of the ready sub-buffers will
-relay_buf_full() return 0, in which case the buffer switch can
-continue.
-
-The implementation of the subbuf_start() callback for 'overwrite' mode
-would be very similar:
-
-static int subbuf_start(struct rchan_buf *buf,
- void *subbuf,
- void *prev_subbuf,
- unsigned int prev_padding)
-{
- if (prev_subbuf)
- *((unsigned *)prev_subbuf) = prev_padding;
-
- subbuf_start_reserve(buf, sizeof(unsigned int));
-
- return 1;
-}
-
-In this case, the relay_buf_full() check is meaningless and the
-callback always returns 1, causing the buffer switch to occur
-unconditionally. It's also meaningless for the client to use the
-relay_subbufs_consumed() function in this mode, as it's never
-consulted.
-
-The default subbuf_start() implementation, used if the client doesn't
-define any callbacks, or doesn't define the subbuf_start() callback,
-implements the simplest possible 'no-overwrite' mode i.e. it does
-nothing but return 0.
-
-Header information can be reserved at the beginning of each sub-buffer
-by calling the subbuf_start_reserve() helper function from within the
-subbuf_start() callback. This reserved area can be used to store
-whatever information the client wants. In the example above, room is
-reserved in each sub-buffer to store the padding count for that
-sub-buffer. This is filled in for the previous sub-buffer in the
-subbuf_start() implementation; the padding value for the previous
-sub-buffer is passed into the subbuf_start() callback along with a
-pointer to the previous sub-buffer, since the padding value isn't
-known until a sub-buffer is filled. The subbuf_start() callback is
-also called for the first sub-buffer when the channel is opened, to
-give the client a chance to reserve space in it. In this case the
-previous sub-buffer pointer passed into the callback will be NULL, so
-the client should check the value of the prev_subbuf pointer before
-writing into the previous sub-buffer.
-
-Writing to a channel
---------------------
-
-kernel clients write data into the current cpu's channel buffer using
-relay_write() or __relay_write(). relay_write() is the main logging
-function - it uses local_irqsave() to protect the buffer and should be
-used if you might be logging from interrupt context. If you know
-you'll never be logging from interrupt context, you can use
-__relay_write(), which only disables preemption. These functions
-don't return a value, so you can't determine whether or not they
-failed - the assumption is that you wouldn't want to check a return
-value in the fast logging path anyway, and that they'll always succeed
-unless the buffer is full and no-overwrite mode is being used, in
-which case you can detect a failed write in the subbuf_start()
-callback by calling the relay_buf_full() helper function.
-
-relay_reserve() is used to reserve a slot in a channel buffer which
-can be written to later. This would typically be used in applications
-that need to write directly into a channel buffer without having to
-stage data in a temporary buffer beforehand. Because the actual write
-may not happen immediately after the slot is reserved, applications
-using relay_reserve() can keep a count of the number of bytes actually
-written, either in space reserved in the sub-buffers themselves or as
-a separate array. See the 'reserve' example in the relay-apps tarball
-at http://relayfs.sourceforge.net for an example of how this can be
-done. Because the write is under control of the client and is
-separated from the reserve, relay_reserve() doesn't protect the buffer
-at all - it's up to the client to provide the appropriate
-synchronization when using relay_reserve().
-
-Closing a channel
------------------
-
-The client calls relay_close() when it's finished using the channel.
-The channel and its associated buffers are destroyed when there are no
-longer any references to any of the channel buffers. relay_flush()
-forces a sub-buffer switch on all the channel buffers, and can be used
-to finalize and process the last sub-buffers before the channel is
-closed.
-
-Creating non-relay files
-------------------------
-
-relay_open() automatically creates files in the relayfs filesystem to
-represent the per-cpu kernel buffers; it's often useful for
-applications to be able to create their own files alongside the relay
-files in the relayfs filesystem as well e.g. 'control' files much like
-those created in /proc or debugfs for similar purposes, used to
-communicate control information between the kernel and user sides of a
-relayfs application. For this purpose the relayfs_create_file() and
-relayfs_remove_file() API functions exist. For relayfs_create_file(),
-the caller passes in a set of user-defined file operations to be used
-for the file and an optional void * to a user-specified data item,
-which will be accessible via inode->u.generic_ip (see the relay-apps
-tarball for examples). The file_operations are a required parameter
-to relayfs_create_file() and thus the semantics of these files are
-completely defined by the caller.
-
-See the relay-apps tarball at http://relayfs.sourceforge.net for
-examples of how these non-relay files are meant to be used.
-
-Creating relay files in other filesystems
------------------------------------------
-
-By default of course, relay_open() creates relay files in the relayfs
-filesystem. Because relay_file_operations is exported, however, it's
-also possible to create and use relay files in other pseudo-filesytems
-such as debugfs.
-
-For this purpose, two callback functions are provided,
-create_buf_file() and remove_buf_file(). create_buf_file() is called
-once for each per-cpu buffer from relay_open() to allow the client to
-create a file to be used to represent the corresponding buffer; if
-this callback is not defined, the default implementation will create
-and return a file in the relayfs filesystem to represent the buffer.
-The callback should return the dentry of the file created to represent
-the relay buffer. Note that the parent directory passed to
-relay_open() (and passed along to the callback), if specified, must
-exist in the same filesystem the new relay file is created in. If
-create_buf_file() is defined, remove_buf_file() must also be defined;
-it's responsible for deleting the file(s) created in create_buf_file()
-and is called during relay_close().
-
-The create_buf_file() implementation can also be defined in such a way
-as to allow the creation of a single 'global' buffer instead of the
-default per-cpu set. This can be useful for applications interested
-mainly in seeing the relative ordering of system-wide events without
-the need to bother with saving explicit timestamps for the purpose of
-merging/sorting per-cpu files in a postprocessing step.
-
-To have relay_open() create a global buffer, the create_buf_file()
-implementation should set the value of the is_global outparam to a
-non-zero value in addition to creating the file that will be used to
-represent the single buffer. In the case of a global buffer,
-create_buf_file() and remove_buf_file() will be called only once. The
-normal channel-writing functions e.g. relay_write() can still be used
-- writes from any cpu will transparently end up in the global buffer -
-but since it is a global buffer, callers should make sure they use the
-proper locking for such a buffer, either by wrapping writes in a
-spinlock, or by copying a write function from relayfs_fs.h and
-creating a local version that internally does the proper locking.
-
-See the 'exported-relayfile' examples in the relay-apps tarball for
-examples of creating and using relay files in debugfs.
-
-Misc
-----
-
-Some applications may want to keep a channel around and re-use it
-rather than open and close a new channel for each use. relay_reset()
-can be used for this purpose - it resets a channel to its initial
-state without reallocating channel buffer memory or destroying
-existing mappings. It should however only be called when it's safe to
-do so i.e. when the channel isn't currently being written to.
-
-Finally, there are a couple of utility callbacks that can be used for
-different purposes. buf_mapped() is called whenever a channel buffer
-is mmapped from user space and buf_unmapped() is called when it's
-unmapped. The client can use this notification to trigger actions
-within the kernel application, such as enabling/disabling logging to
-the channel.
-
-
-Resources
-=========
-
-For news, example code, mailing list, etc. see the relayfs homepage:
-
- http://relayfs.sourceforge.net
-
-
-Credits
-=======
-
-The ideas and specs for relayfs came about as a result of discussions
-on tracing involving the following:
-
-Michel Dagenais <michel.dagenais@polymtl.ca>
-Richard Moore <richardj_moore@uk.ibm.com>
-Bob Wisniewski <bob@watson.ibm.com>
-Karim Yaghmour <karim@opersys.com>
-Tom Zanussi <zanussi@us.ibm.com>
-
-Also thanks to Hubertus Franke for a lot of useful suggestions and bug
-reports.
diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt
index 0b62c62142c..5c3a5190596 100644
--- a/Documentation/sysctl/fs.txt
+++ b/Documentation/sysctl/fs.txt
@@ -25,6 +25,7 @@ Currently, these files are in /proc/sys/fs:
- inode-state
- overflowuid
- overflowgid
+- suid_dumpable
- super-max
- super-nr
@@ -131,6 +132,25 @@ The default is 65534.
==============================================================
+suid_dumpable:
+
+This value can be used to query and set the core dump mode for setuid
+or otherwise protected/tainted binaries. The modes are
+
+0 - (default) - traditional behaviour. Any process which has changed
+ privilege levels or is execute only will not be dumped
+1 - (debug) - all processes dump core when possible. The core dump is
+ owned by the current user and no security is applied. This is
+ intended for system debugging situations only. Ptrace is unchecked.
+2 - (suidsafe) - any binary which normally would not be dumped is dumped
+ readable by root only. This allows the end user to remove
+ such a dump but not access it directly. For security reasons
+ core dumps in this mode will not overwrite one another or
+ other files. This mode is appropriate when adminstrators are
+ attempting to debug problems in a normal environment.
+
+==============================================================
+
super-max & super-nr:
These numbers control the maximum number of superblocks, and
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 7345c338080..89bf8c20a58 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -50,7 +50,6 @@ show up in /proc/sys/kernel:
- shmmax [ sysv ipc ]
- shmmni
- stop-a [ SPARC only ]
-- suid_dumpable
- sysrq ==> Documentation/sysrq.txt
- tainted
- threads-max
@@ -310,25 +309,6 @@ kernel. This value defaults to SHMMAX.
==============================================================
-suid_dumpable:
-
-This value can be used to query and set the core dump mode for setuid
-or otherwise protected/tainted binaries. The modes are
-
-0 - (default) - traditional behaviour. Any process which has changed
- privilege levels or is execute only will not be dumped
-1 - (debug) - all processes dump core when possible. The core dump is
- owned by the current user and no security is applied. This is
- intended for system debugging situations only. Ptrace is unchecked.
-2 - (suidsafe) - any binary which normally would not be dumped is dumped
- readable by root only. This allows the end user to remove
- such a dump but not access it directly. For security reasons
- core dumps in this mode will not overwrite one another or
- other files. This mode is appropriate when adminstrators are
- attempting to debug problems in a normal environment.
-
-==============================================================
-
tainted:
Non-zero if the kernel has been tainted. Numeric values, which
diff --git a/Makefile b/Makefile
index 8406d02c638..33559b56644 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
VERSION = 2
PATCHLEVEL = 6
SUBLEVEL = 18
-EXTRAVERSION = -rc4
+EXTRAVERSION = -rc5
NAME=Crazed Snow-Weasel
# *DOCUMENTATION*
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index f71fb4a029c..b2751eadbc5 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -142,6 +142,7 @@ config X86_SUMMIT
In particular, it is needed for the x440.
If you don't have one of these computers, you should say N here.
+ If you want to build a NUMA kernel, you must select ACPI.
config X86_BIGSMP
bool "Support for other sub-arch SMP systems with more than 8 CPUs"
@@ -169,6 +170,7 @@ config X86_GENERICARCH
help
This option compiles in the Summit, bigsmp, ES7000, default subarchitectures.
It is intended for a generic binary kernel.
+ If you want a NUMA kernel, select ACPI. We need SRAT for NUMA.
config X86_ES7000
bool "Support for Unisys ES7000 IA32 series"
@@ -542,7 +544,7 @@ config X86_PAE
# Common NUMA Features
config NUMA
bool "Numa Memory Allocation and Scheduler Support"
- depends on SMP && HIGHMEM64G && (X86_NUMAQ || X86_GENERICARCH || (X86_SUMMIT && ACPI))
+ depends on SMP && HIGHMEM64G && (X86_NUMAQ || (X86_SUMMIT || X86_GENERICARCH) && ACPI)
default n if X86_PC
default y if (X86_NUMAQ || X86_SUMMIT)
diff --git a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
index efb41e81351..e6ea00edcb5 100644
--- a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -567,16 +567,11 @@ static struct cpufreq_driver acpi_cpufreq_driver = {
static int __init
acpi_cpufreq_init (void)
{
- int result = 0;
-
dprintk("acpi_cpufreq_init\n");
- result = acpi_cpufreq_early_init_acpi();
+ acpi_cpufreq_early_init_acpi();
- if (!result)
- result = cpufreq_register_driver(&acpi_cpufreq_driver);
-
- return (result);
+ return cpufreq_register_driver(&acpi_cpufreq_driver);
}
diff --git a/drivers/base/node.c b/drivers/base/node.c
index d7de1753e09..e9b0957f15d 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -64,7 +64,7 @@ static ssize_t node_read_meminfo(struct sys_device * dev, char * buf)
"Node %d Mapped: %8lu kB\n"
"Node %d AnonPages: %8lu kB\n"
"Node %d PageTables: %8lu kB\n"
- "Node %d NFS Unstable: %8lu kB\n"
+ "Node %d NFS_Unstable: %8lu kB\n"
"Node %d Bounce: %8lu kB\n"
"Node %d Slab: %8lu kB\n",
nid, K(i.totalram),
diff --git a/drivers/cdrom/gscd.c b/drivers/cdrom/gscd.c
index b6ee50a2916..fa708248976 100644
--- a/drivers/cdrom/gscd.c
+++ b/drivers/cdrom/gscd.c
@@ -266,7 +266,7 @@ repeat:
goto out;
if (req->cmd != READ) {
- printk("GSCD: bad cmd %lu\n", rq_data_dir(req));
+ printk("GSCD: bad cmd %u\n", rq_data_dir(req));
end_request(req, 0);
goto repeat;
}
diff --git a/drivers/char/moxa.c b/drivers/char/moxa.c
index 4ea7bd5f4f5..a369dd6877d 100644
--- a/drivers/char/moxa.c
+++ b/drivers/char/moxa.c
@@ -142,6 +142,7 @@ typedef struct _moxa_board_conf {
static moxa_board_conf moxa_boards[MAX_BOARDS];
static void __iomem *moxaBaseAddr[MAX_BOARDS];
+static int loadstat[MAX_BOARDS];
struct moxa_str {
int type;
@@ -1688,6 +1689,8 @@ int MoxaDriverPoll(void)
if (moxaCard == 0)
return (-1);
for (card = 0; card < MAX_BOARDS; card++) {
+ if (loadstat[card] == 0)
+ continue;
if ((ports = moxa_boards[card].numPorts) == 0)
continue;
if (readb(moxaIntPend[card]) == 0xff) {
@@ -2903,6 +2906,7 @@ static int moxaloadcode(int cardno, unsigned char __user *tmp, int len)
}
break;
}
+ loadstat[cardno] = 1;
return (0);
}
@@ -2920,7 +2924,7 @@ static int moxaloadc218(int cardno, void __iomem *baseAddr, int len)
len1 = len >> 1;
ptr = (ushort *) moxaBuff;
for (i = 0; i < len1; i++)
- usum += *(ptr + i);
+ usum += le16_to_cpu(*(ptr + i));
retry = 0;
do {
len1 = len >> 1;
@@ -2992,7 +2996,7 @@ static int moxaloadc320(int cardno, void __iomem *baseAddr, int len, int *numPor
wlen = len >> 1;
uptr = (ushort *) moxaBuff;
for (i = 0; i < wlen; i++)
- usum += uptr[i];
+ usum += le16_to_cpu(uptr[i]);
retry = 0;
j = 0;
do {
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index bfdb90242a9..bb0d9199e99 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -153,6 +153,15 @@ int tty_ioctl(struct inode * inode, struct file * file,
static int tty_fasync(int fd, struct file * filp, int on);
static void release_mem(struct tty_struct *tty, int idx);
+/**
+ * alloc_tty_struct - allocate a tty object
+ *
+ * Return a new empty tty structure. The data fields have not
+ * been initialized in any way but has been zeroed
+ *
+ * Locking: none
+ * FIXME: use kzalloc
+ */
static struct tty_struct *alloc_tty_struct(void)
{
@@ -166,6 +175,15 @@ static struct tty_struct *alloc_tty_struct(void)
static void tty_buffer_free_all(struct tty_struct *);
+/**
+ * free_tty_struct - free a disused tty
+ * @tty: tty struct to free
+ *
+ * Free the write buffers, tty queue and tty memory itself.
+ *
+ * Locking: none. Must be called after tty is definitely unused
+ */
+
static inline void free_tty_struct(struct tty_struct *tty)
{
kfree(tty->write_buf);
@@ -175,6 +193,17 @@ static inline void free_tty_struct(struct tty_struct *tty)
#define TTY_NUMBER(tty) ((tty)->index + (tty)->driver->name_base)
+/**
+ * tty_name - return tty naming
+ * @tty: tty structure
+ * @buf: buffer for output
+ *
+ * Convert a tty structure into a name. The name reflects the kernel
+ * naming policy and if udev is in use may not reflect user space
+ *
+ * Locking: none
+ */
+
char *tty_name(struct tty_struct *tty, char *buf)
{
if (!tty) /* Hmm. NULL pointer. That's fun. */
@@ -235,6 +264,28 @@ static int check_tty_count(struct tty_struct *tty, const char *routine)
* Tty buffer allocation management
*/
+
+/**
+ * tty_buffer_free_all - free buffers used by a tty
+ * @tty: tty to free from
+ *
+ * Remove all the buffers pending on a tty whether queued with data
+ * or in the free ring. Must be called when the tty is no longer in use
+ *
+ * Locking: none
+ */
+
+
+/**
+ * tty_buffer_free_all - free buffers used by a tty
+ * @tty: tty to free from
+ *
+ * Remove all the buffers pending on a tty whether queued with data
+ * or in the free ring. Must be called when the tty is no longer in use
+ *
+ * Locking: none
+ */
+
static void tty_buffer_free_all(struct tty_struct *tty)
{
struct tty_buffer *thead;
@@ -247,19 +298,47 @@ static void tty_buffer_free_all(struct tty_struct *tty)
kfree(thead);
}
tty->buf.tail = NULL;
+ tty->buf.memory_used = 0;
}
+/**
+ * tty_buffer_init - prepare a tty buffer structure
+ * @tty: tty to initialise
+ *
+ * Set up the initial state of the buffer management for a tty device.
+ * Must be called before the other tty buffer functions are used.
+ *
+ * Locking: none
+ */
+
static void tty_buffer_init(struct tty_struct *tty)
{
spin_lock_init(&tty->buf.lock);
tty->buf.head = NULL;
tty->buf.tail = NULL;
tty->buf.free = NULL;
+ tty->buf.memory_used = 0;
}
-static struct tty_buffer *tty_buffer_alloc(size_t size)
+/**
+ * tty_buffer_alloc - allocate a tty buffer
+ * @tty: tty device
+ * @size: desired size (characters)
+ *
+ * Allocate a new tty buffer to hold the desired number of characters.
+ * Return NULL if out of memory or the allocation would exceed the
+ * per device queue
+ *
+ * Locking: Caller must hold tty->buf.lock
+ */
+
+static struct tty_buffer *tty_buffer_alloc(struct tty_struct *tty, size_t size)
{
- struct tty_buffer *p = kmalloc(sizeof(struct tty_buffer) + 2 * size, GFP_ATOMIC);
+ struct tty_buffer *p;
+
+ if (tty->buf.memory_used + size > 65536)
+ return NULL;
+ p = kmalloc(sizeof(struct tty_buffer) + 2 * size, GFP_ATOMIC);
if(p == NULL)
return NULL;
p->used = 0;
@@ -269,17 +348,27 @@ static struct tty_buffer *tty_buffer_alloc(size_t size)
p->read = 0;
p->char_buf_ptr = (char *)(p->data);
p->flag_buf_ptr = (unsigned char *)p->char_buf_ptr + size;
-/* printk("Flip create %p\n", p); */
+ tty->buf.memory_used += size;
return p;
}
-/* Must be called with the tty_read lock held. This needs to acquire strategy
- code to decide if we should kfree or relink a given expired buffer */
+/**
+ * tty_buffer_free - free a tty buffer
+ * @tty: tty owning the buffer
+ * @b: the buffer to free
+ *
+ * Free a tty buffer, or add it to the free list according to our
+ * internal strategy
+ *
+ * Locking: Caller must hold tty->buf.lock
+ */
static void tty_buffer_free(struct tty_struct *tty, struct tty_buffer *b)
{
/* Dumb strategy for now - should keep some stats */
-/* printk("Flip dispose %p\n", b); */
+ tty->buf.memory_used -= b->size;
+ WARN_ON(tty->buf.memory_used < 0);
+
if(b->size >= 512)
kfree(b);
else {
@@ -288,6 +377,18 @@ static void tty_buffer_free(struct tty_struct *tty, struct tty_buffer *b)
}
}
+/**
+ * tty_buffer_find - find a free tty buffer
+ * @tty: tty owning the buffer
+ * @size: characters wanted
+ *
+ * Locate an existing suitable tty buffer or if we are lacking one then
+ * allocate a new one. We round our buffers off in 256 character chunks
+ * to get better allocation behaviour.
+ *
+ * Locking: Caller must hold tty->buf.lock
+ */
+
static struct tty_buffer *tty_buffer_find(struct tty_struct *tty, size_t size)
{
struct tty_buffer **tbh = &tty->buf.free;
@@ -299,20 +400,28 @@ static struct tty_buffer *tty_buffer_find(struct tty_struct *tty, size_t size)
t->used = 0;
t->commit = 0;
t->read = 0;
- /* DEBUG ONLY */
-/* memset(t->data, '*', size); */
-/* printk("Flip recycle %p\n", t); */
+ tty->buf.memory_used += t->size;
return t;
}
tbh = &((*tbh)->next);
}
/* Round the buffer size out */
size = (size + 0xFF) & ~ 0xFF;
- return tty_buffer_alloc(size);
+ return tty_buffer_alloc(tty, size);
/* Should possibly check if this fails for the largest buffer we
have queued and recycle that ? */
}
+/**
+ * tty_buffer_request_room - grow tty buffer if needed
+ * @tty: tty structure
+ * @size: size desired
+ *
+ * Make at least size bytes of linear space available for the tty
+ * buffer. If we fail return the size we managed to find.
+ *
+ * Locking: Takes tty->buf.lock
+ */
int tty_buffer_request_room(struct tty_struct *tty, size_t size)
{
struct tty_buffer *b, *n;
@@ -347,6 +456,18 @@ int tty_buffer_request_room(struct tty_struct *tty, size_t size)
}
EXPORT_SYMBOL_GPL(tty_buffer_request_room);
+/**
+ * tty_insert_flip_string - Add characters to the tty buffer
+ * @tty: tty structure
+ * @chars: characters
+ * @size: size
+ *
+ * Queue a series of bytes to the tty buffering. All the characters
+ * passed are marked as without error. Returns the number added.
+ *
+ * Locking: Called functions may take tty->buf.lock
+ */
+
int tty_insert_flip_string(struct tty_struct *tty, const unsigned char *chars,
size_t size)
{
@@ -370,6 +491,20 @@ int tty_insert_flip_string(struct tty_struct *tty, const unsigned char *chars,
}
EXPORT_SYMBOL(tty_insert_flip_string);
+/**
+ * tty_insert_flip_string_flags - Add characters to the tty buffer
+ * @tty: tty structure
+ * @chars: characters
+ * @flags: flag bytes
+ * @size: size
+ *
+ * Queue a series of bytes to the tty buffering. For each character
+ * the flags array indicates the status of the character. Returns the
+ * number added.
+ *
+ * Locking: Called functions may take tty->buf.lock
+ */
+
int tty_insert_flip_string_flags(struct tty_struct *tty,
const unsigned char *chars, const char *flags, size_t size)
{
@@ -394,6 +529,17 @@ int tty_insert_flip_string_flags(struct tty_struct *tty,
}
EXPORT_SYMBOL(tty_insert_flip_string_flags);
+/**
+ * tty_schedule_flip - push characters to ldisc
+ * @tty: tty to push from
+ *
+ * Takes any pending buffers and transfers their ownership to the
+ * ldisc side of the queue. It then schedules those characters for
+ * processing by the line discipline.
+ *
+ * Locking: Takes tty->buf.lock
+ */
+
void tty_schedule_flip(struct tty_struct *tty)
{
unsigned long flags;
@@ -405,12 +551,19 @@ void tty_schedule_flip(struct tty_struct *tty)
}
EXPORT_SYMBOL(tty_schedule_flip);
-/*
+/**
+ * tty_prepare_flip_string - make room for characters
+ * @tty: tty
+ * @chars: return pointer for character write area
+ * @size: desired size
+ *
* Prepare a block of space in the buffer for data. Returns the length
* available and buffer pointer to the space which is now allocated and
* accounted for as ready for normal characters. This is used for drivers
* that need their own block copy routines into the buffer. There is no
* guarantee the buffer is a DMA target!
+ *
+ * Locking: May call functions taking tty->buf.lock
*/
int tty_prepare_flip_string(struct tty_struct *tty, unsigned char **chars, size_t size)
@@ -427,12 +580,20 @@ int tty_prepare_flip_string(struct tty_struct *tty, unsigned char **chars, size_
EXPORT_SYMBOL_GPL(tty_prepare_flip_string);
-/*
+/**
+ * tty_prepare_flip_string_flags - make room for characters
+ * @tty: tty
+ * @chars: return pointer for character write area
+ * @flags: return pointer for status flag write area
+ * @size: desired size
+ *
* Prepare a block of space in the buffer for data. Returns the length
* available and buffer pointer to the space which is now allocated and
* accounted for as ready for characters. This is used for drivers
* that need their own block copy routines into the buffer. There is no
* guarantee the buffer is a DMA target!
+ *
+ * Locking: May call functions taking tty->buf.lock
*/
int tty_prepare_flip_string_flags(struct tty_struct *tty, unsigned char **chars, char **flags, size_t size)
@@ -451,10 +612,16 @@ EXPORT_SYMBOL_GPL(tty_prepare_flip_string_flags);
-/*
+/**
+ * tty_set_termios_ldisc - set ldisc field
+ * @tty: tty structure
+ * @num: line discipline number
+ *
* This is probably overkill for real world processors but
* they are not on hot paths so a little discipline won't do
* any harm.
+ *
+ * Locking: takes termios_sem
*/
static void tty_set_termios_ldisc(struct tty_struct *tty, int num)
@@ -474,6 +641,19 @@ static DEFINE_SPINLOCK(tty_ldisc_lock);
static DECLARE_WAIT_QUEUE_HEAD(tty_ldisc_wait);
static struct tty_ldisc tty_ldiscs[NR_LDISCS]; /* line disc dispatch table */
+/**
+ * tty_register_ldisc - install a line discipline
+ * @disc: ldisc number
+ * @new_ldisc: pointer to the ldisc object
+ *
+ * Installs a new line discipline into the kernel. The discipline
+ * is set up as unreferenced and then made available to the kernel
+ * from this point onwards.
+ *
+ * Locking:
+ * takes tty_ldisc_lock to guard against ldisc races
+ */
+
int tty_register_ldisc(int disc, struct tty_ldisc *new_ldisc)
{
unsigned long flags;
@@ -493,6 +673,18 @@ int tty_register_ldisc(int disc, struct tty_ldisc *new_ldisc)
}
EXPORT_SYMBOL(tty_register_ldisc);
+/**
+ * tty_unregister_ldisc - unload a line discipline
+ * @disc: ldisc number
+ * @new_ldisc: pointer to the ldisc object
+ *
+ * Remove a line discipline from the kernel providing it is not
+ * currently in use.
+ *
+ * Locking:
+ * takes tty_ldisc_lock to guard against ldisc races
+ */
+
int tty_unregister_ldisc(int disc)
{
unsigned long flags;
@@ -512,6 +704,19 @@ int tty_unregister_ldisc(int disc)
}
EXPORT_SYMBOL(tty_unregister_ldisc);
+/**
+ * tty_ldisc_get - take a reference to an ldisc
+ * @disc: ldisc number
+ *
+ * Takes a reference to a line discipline. Deals with refcounts and
+ * module locking counts. Returns NULL if the discipline is not available.
+ * Returns a pointer to the discipline and bumps the ref count if it is
+ * available
+ *
+ * Locking:
+ * takes tty_ldisc_lock to guard against ldisc races
+ */
+
struct tty_ldisc *tty_ldisc_get(int disc)
{
unsigned long flags;
@@ -540,6 +745,17 @@ struct tty_ldisc *tty_ldisc_get(int disc)
EXPORT_SYMBOL_GPL(tty_ldisc_get);
+/**
+ * tty_ldisc_put - drop ldisc reference
+ * @disc: ldisc number
+ *
+ * Drop a reference to a line discipline. Manage refcounts and
+ * module usage counts
+ *
+ * Locking:
+ * takes tty_ldisc_lock to guard against ldisc races
+ */
+
void tty_ldisc_put(int disc)
{
struct tty_ldisc *ld;
@@ -557,6 +773,19 @@ void tty_ldisc_put(int disc)
EXPORT_SYMBOL_GPL(tty_ldisc_put);
+/**
+ * tty_ldisc_assign - set ldisc on a tty
+ * @tty: tty to assign
+ * @ld: line discipline
+ *
+ * Install an instance of a line discipline into a tty structure. The
+ * ldisc must have a reference count above zero to ensure it remains/
+ * The tty instance refcount starts at zero.
+ *
+ * Locking:
+ * Caller must hold references
+ */
+
static void tty_ldisc_assign(struct tty_struct *tty, struct tty_ldisc *ld)
{
tty->ldisc = *ld;
@@ -571,6 +800,8 @@ static void tty_ldisc_assign(struct tty_struct *tty, struct tty_ldisc *ld)
* the tty ldisc. Return 0 on failure or 1 on success. This is
* used to implement both the waiting and non waiting versions
* of tty_ldisc_ref
+ *
+ * Locking: takes tty_ldisc_lock
*/
static int tty_ldisc_try(struct tty_struct *tty)
@@ -602,6 +833,8 @@ static int tty_ldisc_try(struct tty_struct *tty)
* must also be careful not to hold other locks that will deadlock
* against a discipline change, such as an existing ldisc reference
* (which we check for)
+ *
+ * Locking: call functions take tty_ldisc_lock
*/
struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *tty)
@@ -622,6 +855,8 @@ EXPORT_SYMBOL_GPL(tty_ldisc_ref_wait);
* Dereference the line discipline for the terminal and take a
* reference to it. If the line discipline is in flux then
* return NULL. Can be called from IRQ and timer functions.
+ *
+ * Locking: called functions take tty_ldisc_lock
*/
struct tty_ldisc *tty_ldisc_ref(struct tty_struct *tty)
@@ -639,6 +874,8 @@ EXPORT_SYMBOL_GPL(tty_ldisc_ref);
*
* Undoes the effect of tty_ldisc_ref or tty_ldisc_ref_wait. May
* be called in IRQ context.
+ *
+ * Locking: takes tty_ldisc_lock
*/
void tty_ldisc_deref(struct tty_ldisc *ld)
@@ -683,6 +920,9 @@ static void tty_ldisc_enable(struct tty_struct *tty)
*
* Set the discipline of a tty line. Must be called from a process
* context.
+ *
+ * Locking: takes tty_ldisc_lock.
+ * called functions take termios_sem
*/
static int tty_set_ldisc(struct tty_struct *tty, int ldisc)
@@ -846,9 +1086,17 @@ restart:
return retval;
}
-/*
- * This routine returns a tty driver structure, given a device number
+/**
+ * get_tty_driver - find device of a tty
+ * @dev_t: device identifier
+ * @index: returns the index of the tty
+ *
+ * This routine returns a tty driver structure, given a device number
+ * and also passes back the index number.
+ *
+ * Locking: caller must hold tty_mutex
*/
+
static struct tty_driver *get_tty_driver(dev_t device, int *index)
{
struct tty_driver *p;
@@ -863,11 +1111,17 @@ static struct tty_driver *get_tty_driver(dev_t device, int *index)
return NULL;
}
-/*
- * If we try to write to, or set the state of, a terminal and we're
- * not in the foreground, send a SIGTTOU. If the signal is blocked or
- * ignored, go ahead and perform the operation. (POSIX 7.2)
+/**
+ * tty_check_change - check for POSIX terminal changes
+ * @tty: tty to check
+ *
+ * If we try to write to, or set the state of, a terminal and we're
+ * not in the foreground, send a SIGTTOU. If the signal is blocked or
+ * ignored, go ahead and perform the operation. (POSIX 7.2)
+ *
+ * Locking: none
*/
+
int tty_check_change(struct tty_struct * tty)
{
if (current->signal->tty != tty)
@@ -1005,10 +1259,27 @@ void tty_ldisc_flush(struct tty_struct *tty)
EXPORT_SYMBOL_GPL(tty_ldisc_flush);
-/*
- * This can be called by the "eventd" kernel thread. That is process synchronous,
- * but doesn't hold any locks, so we need to make sure we have the appropriate
- * locks for what we're doing..
+/**
+ * do_tty_hangup - actual handler for hangup events
+ * @data: tty device
+ *
+ * This can be called by the "eventd" kernel thread. That is process
+ * synchronous but doesn't hold any locks, so we need to make sure we
+ * have the appropriate locks for what we're doing.
+ *
+ * The hangup event clears any pending redirections onto the hung up
+ * device. It ensures future writes will error and it does the needed
+ * line discipline hangup and signal delivery. The tty object itself
+ * remains intact.
+ *
+ * Locking:
+ * BKL
+ * redirect lock for undoing redirection
+ * file list lock for manipulating list of ttys
+ * tty_ldisc_lock from called functions
+ * termios_sem resetting termios data
+ * tasklist_lock to walk task list for hangup event
+ *
*/
static void do_tty_hangup(void *data)
{
@@ -1133,6 +1404,14 @@ static void do_tty_hangup(void *data)
fput(f);
}
+/**
+ * tty_hangup - trigger a hangup event
+ * @tty: tty to hangup
+ *
+ * A carrier loss (virtual or otherwise) has occurred on this like
+ * schedule a hangup sequence to run after this event.
+ */
+
void tty_hangup(struct tty_struct * tty)
{
#ifdef TTY_DEBUG_HANGUP
@@ -1145,6 +1424,15 @@ void tty_hangup(struct tty_struct * tty)
EXPORT_SYMBOL(tty_hangup);
+/**
+ * tty_vhangup - process vhangup
+ * @tty: tty to hangup
+ *
+ * The user has asked via system call for the terminal to be hung up.
+ * We do this synchronously so that when the syscall returns the process
+ * is complete. That guarantee is neccessary for security reasons.
+ */
+
void tty_vhangup(struct tty_struct * tty)
{
#ifdef TTY_DEBUG_HANGUP
@@ -1156,6 +1444,14 @@ void tty_vhangup(struct tty_struct * tty)
}
EXPORT_SYMBOL(tty_vhangup);
+/**
+ * tty_hung_up_p - was tty hung up
+ * @filp: file pointer of tty
+ *
+ * Return true if the tty has been subject to a vhangup or a carrier
+ * loss
+ */
+
int tty_hung_up_p(struct file * filp)
{
return (filp->f_op == &hung_up_tty_fops);
@@ -1163,19 +1459,28 @@ int tty_hung_up_p(struct file * filp)
EXPORT_SYMBOL(tty_hung_up_p);
-/*
- * This function is typically called only by the session leader, when
- * it wants to disassociate itself from its controlling tty.
+/**
+ * disassociate_ctty - disconnect controlling tty
+ * @on_exit: true if exiting so need to "hang up" the session
+ *
+ * This function is typically called only by the session leader, when
+ * it wants to disassociate itself from its controlling tty.
*
- * It performs the following functions:
+ * It performs the following functions:
* (1) Sends a SIGHUP and SIGCONT to the foreground process group
* (2) Clears the tty from being controlling the session
* (3) Clears the controlling tty for all processes in the
* session group.
*
- * The argument on_exit is set to 1 if called when a process is
- * exiting; it is 0 if called by the ioctl TIOCNOTTY.
+ * The argument on_exit is set to 1 if called when a process is
+ * exiting; it is 0 if called by the ioctl TIOCNOTTY.
+ *
+ * Locking: tty_mutex is taken to protect current->signal->tty
+ * BKL is taken for hysterical raisins
+ * Tasklist lock is taken (under tty_mutex) to walk process
+ * lists for the session.
*/
+
void disassociate_ctty(int on_exit)
{
struct tty_struct *tty;
@@ -1222,6 +1527,25 @@ void disassociate_ctty(int on_exit)
unlock_kernel();
}
+
+/**
+ * stop_tty - propogate flow control
+ * @tty: tty to stop
+ *
+ * Perform flow control to the driver. For PTY/TTY pairs we
+ * must also propogate the TIOCKPKT status. May be called
+ * on an already stopped device and will not re-call the driver
+ * method.
+ *
+ * This functionality is used by both the line disciplines for
+ * halting incoming flow and by the driver. It may therefore be
+ * called from any context, may be under the tty atomic_write_lock
+ * but not always.
+ *
+ * Locking:
+ * Broken. Relies on BKL which is unsafe here.
+ */
+
void stop_tty(struct tty_struct *tty)
{
if (tty->stopped)
@@ -1238,6 +1562,19 @@ void stop_tty(struct tty_struct *tty)
EXPORT_SYMBOL(stop_tty);
+/**
+ * start_tty - propogate flow control
+ * @tty: tty to start
+ *
+ * Start a tty that has been stopped if at all possible. Perform
+ * any neccessary wakeups and propogate the TIOCPKT status. If this
+ * is the tty was previous stopped and is being started then the
+ * driver start method is invoked and the line discipline woken.
+ *
+ * Locking:
+ * Broken. Relies on BKL which is unsafe here.
+ */
+
void start_tty(struct tty_struct *tty)
{
if (!tty->stopped || tty->flow_stopped)
@@ -1258,6 +1595,23 @@ void start_tty(struct tty_struct *tty)
EXPORT_SYMBOL(start_tty);
+/**
+ * tty_read - read method for tty device files
+ * @file: pointer to tty file
+ * @buf: user buffer
+ * @count: size of user buffer
+ * @ppos: unused
+ *
+ * Perform the read system call function on this terminal device. Checks
+ * for hung up devices before calling the line discipline method.
+ *
+ * Locking:
+ * Locks the line discipline internally while needed
+ * For historical reasons the line discipline read method is
+ * invoked under the BKL. This will go away in time so do not rely on it
+ * in new code. Multiple read calls may be outstanding in parallel.
+ */
+
static ssize_t tty_read(struct file * file, char __user * buf, size_t count,
loff_t *ppos)
{
@@ -1302,6 +1656,7 @@ static inline ssize_t do_tty_write(
ssize_t ret = 0, written = 0;
unsigned int chunk;
+ /* FIXME: O_NDELAY ... */
if (mutex_lock_interruptible(&tty->atomic_write_lock)) {
return -ERESTARTSYS;
}
@@ -1318,6 +1673,9 @@ static inline ssize_t do_tty_write(
* layer has problems with bigger chunks. It will
* claim to be able to handle more characters than
* it actually does.
+ *
+ * FIXME: This can probably go away now except that 64K chunks
+ * are too likely to fail unless switched to vmalloc...
*/
chunk = 2048;
if (test_bit(TTY_NO_WRITE_SPLIT, &tty->flags))
@@ -1375,6 +1733,24 @@ static inline ssize_t do_tty_write(
}
+/**
+ * tty_write - write method for tty device file
+ * @file: tty file pointer
+ * @buf: user data to write
+ * @count: bytes to write
+ * @ppos: unused
+ *
+ * Write data to a tty device via the line discipline.
+ *
+ * Locking:
+ * Locks the line discipline as required
+ * Writes to the tty driver are serialized by the atomic_write_lock
+ * and are then processed in chunks to the device. The line discipline
+ * write method will not be involked in parallel for each device
+ * The line discipline write method is called under the big
+ * kernel lock for historical reasons. New code should not rely on this.
+ */
+
static ssize_t tty_write(struct file * file, const char __user * buf, size_t count,
loff_t *ppos)
{
@@ -1422,7 +1798,18 @@ ssize_t redirected_tty_write(struct file * file, const char __user * buf, size_t
static char ptychar[] = "pqrstuvwxyzabcde";
-static inline void pty_line_name(struct tty_driver *driver, int index, char *p)
+/**
+ * pty_line_name - generate name for a pty
+ * @driver: the tty driver in use
+ * @index: the minor number
+ * @p: output buffer of at least 6 bytes
+ *
+ * Generate a name from a driver reference and write it to the output
+ * buffer.
+ *
+ * Locking: None
+ */
+static void pty_line_name(struct tty_driver *driver, int index, char *p)
{
int i = index + driver->name_base;
/* ->name is initialized to "ttyp", but "tty" is expected */
@@ -1431,24 +1818,53 @@ static inline void pty_line_name(struct tty_driver *driver, int index, char *p)
ptychar[i >> 4 & 0xf], i & 0xf);
}
-static inline void tty_line_name(struct tty_driver *driver, int index, char *p)
+/**
+ * pty_line_name - generate name for a tty
+ * @driver: the tty driver in use
+ * @index: the minor number
+ * @p: output buffer of at least 7 bytes
+ *
+ * Generate a name from a driver reference and write it to the output
+ * buffer.
+ *
+ * Locking: None
+ */
+static void tty_line_name(struct tty_driver *driver, int index, char *p)
{
sprintf(p, "%s%d", driver->name, index + driver->name_base);
}
-/*
+/**
+ * init_dev - initialise a tty device
+ * @driver: tty driver we are opening a device on
+ * @idx: device index
+ * @tty: returned tty structure
+ *
+ * Prepare a tty device. This may not be a "new" clean device but
+ * could also be an active device. The pty drivers require special
+ * handling because of this.
+ *
+ * Locking:
+ * The function is called under the tty_mutex, which
+ * protects us from the tty struct or driver itself going away.
+ *
+ * On exit the tty device has the line discipline attached and
+ * a reference count of 1. If a pair was created for pty/tty use
+ * and the other was a pty master then it too has a reference count of 1.
+ *
* WSH 06/09/97: Rewritten to remove races and properly clean up after a
* failed open. The new code protects the open with a mutex, so it's
* really quite straightforward. The mutex locking can probably be
* relaxed for the (most common) case of reopening a tty.
*/
+
static int init_dev(struct tty_driver *driver, int idx,
struct tty_struct **ret_tty)
{
struct tty_struct *tty, *o_tty;
struct termios *tp, **tp_loc, *o_tp, **o_tp_loc;
struct termios *ltp, **ltp_loc, *o_ltp, **o_ltp_loc;
- int retval=0;
+ int retval = 0;
/* check whether we're reopening an existing tty */
if (driver->flags & TTY_DRIVER_DEVPTS_MEM) {
@@ -1662,10 +2078,20 @@ release_mem_out:
goto end_init;
}
-/*
- * Releases memory associated with a tty structure, and clears out the
- * driver table slots.
+/**
+ * release_mem - release tty structure memory
+ *
+ * Releases memory associated with a tty structure, and clears out the
+ * driver table slots. This function is called when a device is no longer
+ * in use. It also gets called when setup of a device fails.
+ *
+ * Locking:
+ * tty_mutex - sometimes only
+ * takes the file list lock internally when working on the list
+ * of ttys that the driver keeps.
+ * FIXME: should we require tty_mutex is held here ??
*/
+
static void release_mem(struct tty_struct *tty, int idx)
{
struct tty_struct *o_tty;
@@ -2006,18 +2432,27 @@ static void release_dev(struct file * filp)
}
-/*
- * tty_open and tty_release keep up the tty count that contains the
- * number of opens done on a tty. We cannot use the inode-count, as
- * different inodes might point to the same tty.
+/**
+ * tty_open - open a tty device
+ * @inode: inode of device file
+ * @filp: file pointer to tty
+ *
+ * tty_open and tty_release keep up the tty count that contains the
+ * number of opens done on a tty. We cannot use the inode-count, as
+ * different inodes might point to the same tty.
*
- * Open-counting is needed for pty masters, as well as for keeping
- * track of serial lines: DTR is dropped when the last close happens.
- * (This is not done solely through tty->count, now. - Ted 1/27/92)
+ * Open-counting is needed for pty masters, as well as for keeping
+ * track of serial lines: DTR is dropped when the last close happens.
+ * (This is not done solely through tty->count, now. - Ted 1/27/92)
*
- * The termios state of a pty is reset on first open so that
- * settings don't persist across reuse.
+ * The termios state of a pty is reset on first open so that
+ * settings don't persist across reuse.
+ *
+ * Locking: tty_mutex protects current->signal->tty, get_tty_driver and
+ * init_dev work. tty->count should protect the rest.
+ * task_lock is held to update task details for sessions
*/
+
static int tty_open(struct inode * inode, struct file * filp)
{
struct tty_struct *tty;
@@ -2132,6 +2567,18 @@ got_driver:
}
#ifdef CONFIG_UNIX98_PTYS
+/**
+ * ptmx_open - open a unix 98 pty master
+ * @inode: inode of device file
+ * @filp: file pointer to tty
+ *
+ * Allocate a unix98 pty master device from the ptmx driver.
+ *
+ * Locking: tty_mutex protects theinit_dev work. tty->count should
+ protect the rest.
+ * allocated_ptys_lock handles the list of free pty numbers
+ */
+
static int ptmx_open(struct inode * inode, struct file * filp)
{
struct tty_struct *tty;
@@ -2191,6 +2638,18 @@ out:
}
#endif
+/**
+ * tty_release - vfs callback for close
+ * @inode: inode of tty
+ * @filp: file pointer for handle to tty
+ *
+ * Called the last time each file handle is closed that references
+ * this tty. There may however be several such references.
+ *
+ * Locking:
+ * Takes bkl. See release_dev
+ */
+
static int tty_release(struct inode * inode, struct file * filp)
{
lock_kernel();
@@ -2199,7 +2658,18 @@ static int tty_release(struct inode * inode, struct file * filp)
return 0;
}
-/* No kernel lock held - fine */
+/**
+ * tty_poll - check tty status
+ * @filp: file being polled
+ * @wait: poll wait structures to update
+ *
+ * Call the line discipline polling method to obtain the poll
+ * status of the device.
+ *
+ * Locking: locks called line discipline but ldisc poll method
+ * may be re-entered freely by other callers.
+ */
+
static unsigned int tty_poll(struct file * filp, poll_table * wait)
{
struct tty_struct * tty;
@@ -2243,6 +2713,21 @@ static int tty_fasync(int fd, struct file * filp, int on)
return 0;
}
+/**
+ * tiocsti - fake input character
+ * @tty: tty to fake input into
+ * @p: pointer to character
+ *
+ * Fake input to a tty device. Does the neccessary locking and
+ * input management.
+ *
+ * FIXME: does not honour flow control ??
+ *
+ * Locking:
+ * Called functions take tty_ldisc_lock
+ * current->signal->tty check is safe without locks
+ */
+
static int tiocsti(struct tty_struct *tty, char __user *p)
{
char ch, mbz = 0;
@@ -2258,6 +2743,18 @@ static int tiocsti(struct tty_struct *tty, char __user *p)
return 0;
}
+/**
+ * tiocgwinsz - implement window query ioctl
+ * @tty; tty
+ * @arg: user buffer for result
+ *
+ * Copies the kernel idea of the window size into the user buffer. No
+ * locking is done.
+ *
+ * FIXME: Returning random values racing a window size set is wrong
+ * should lock here against that
+ */
+
static int tiocgwinsz(struct tty_struct *tty, struct winsize __user * arg)
{
if (copy_to_user(arg, &tty->winsize, sizeof(*arg)))
@@ -2265,6 +2762,24 @@ static int tiocgwinsz(struct tty_struct *tty, struct winsize __user * arg)
return 0;
}
+/**
+ * tiocswinsz - implement window size set ioctl
+ * @tty; tty
+ * @arg: user buffer for result
+ *
+ * Copies the user idea of the window size to the kernel. Traditionally
+ * this is just advisory information but for the Linux console it
+ * actually has driver level meaning and triggers a VC resize.
+ *
+ * Locking:
+ * The console_sem is used to ensure we do not try and resize
+ * the console twice at once.
+ * FIXME: Two racing size sets may leave the console and kernel
+ * parameters disagreeing. Is this exploitable ?
+ * FIXME: Random values racing a window size get is wrong
+ * should lock here against that
+ */
+
static int tiocswinsz(struct tty_struct *tty, struct tty_struct *real_tty,
struct winsize __user * arg)
{
@@ -2294,6 +2809,15 @@ static int tiocswinsz(struct tty_struct *tty, struct tty_struct *real_tty,
return 0;
}
+/**
+ * tioccons - allow admin to move logical console
+ * @file: the file to become console
+ *
+ * Allow the adminstrator to move the redirected console device
+ *
+ * Locking: uses redirect_lock to guard the redirect information
+ */
+
static int tioccons(struct file *file)
{
if (!capable(CAP_SYS_ADMIN))
@@ -2319,6 +2843,17 @@ static int tioccons(struct file *file)
return 0;
}
+/**
+ * fionbio - non blocking ioctl
+ * @file: file to set blocking value
+ * @p: user parameter
+ *
+ * Historical tty interfaces had a blocking control ioctl before
+ * the generic functionality existed. This piece of history is preserved
+ * in the expected tty API of posix OS's.
+ *
+ * Locking: none, the open fle handle ensures it won't go away.
+ */
static int fionbio(struct file *file, int __user *p)
{
@@ -2334,6 +2869,23 @@ static int fionbio(struct file *file, int __user *p)
return 0;
}
+/**
+ * tiocsctty - set controlling tty
+ * @tty: tty structure
+ * @arg: user argument
+ *
+ * This ioctl is used to manage job control. It permits a session
+ * leader to set this tty as the controlling tty for the session.
+ *
+ * Locking:
+ * Takes tasklist lock internally to walk sessions
+ * Takes task_lock() when updating signal->tty
+ *
+ * FIXME: tty_mutex is needed to protect signal->tty references.
+ * FIXME: why task_lock on the signal->tty reference ??
+ *
+ */
+
static int tiocsctty(struct tty_struct *tty, int arg)
{
struct task_struct *p;
@@ -2374,6 +2926,18 @@ static int tiocsctty(struct tty_struct *tty, int arg)
return 0;
}
+/**
+ * tiocgpgrp - get process group
+ * @tty: tty passed by user
+ * @real_tty: tty side of the tty pased by the user if a pty else the tty
+ * @p: returned pid
+ *
+ * Obtain the process group of the tty. If there is no process group
+ * return an error.
+ *
+ * Locking: none. Reference to ->signal->tty is safe.
+ */
+
static int tiocgpgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p)
{
/*
@@ -2385,6 +2949,20 @@ static int tiocgpgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t
return put_user(real_tty->pgrp, p);
}
+/**
+ * tiocspgrp - attempt to set process group
+ * @tty: tty passed by user
+ * @real_tty: tty side device matching tty passed by user
+ * @p: pid pointer
+ *
+ * Set the process group of the tty to the session passed. Only
+ * permitted where the tty session is our session.
+ *
+ * Locking: None
+ *
+ * FIXME: current->signal->tty referencing is unsafe.
+ */
+
static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p)
{
pid_t pgrp;
@@ -2408,6 +2986,18 @@ static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t
return 0;
}
+/**
+ * tiocgsid - get session id
+ * @tty: tty passed by user
+ * @real_tty: tty side of the tty pased by the user if a pty else the tty
+ * @p: pointer to returned session id
+ *
+ * Obtain the session id of the tty. If there is no session
+ * return an error.
+ *
+ * Locking: none. Reference to ->signal->tty is safe.
+ */
+
static int tiocgsid(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p)
{
/*
@@ -2421,6 +3011,16 @@ static int tiocgsid(struct tty_struct *tty, struct tty_struct *real_tty, pid_t _
return put_user(real_tty->session, p);
}
+/**
+ * tiocsetd - set line discipline
+ * @tty: tty device
+ * @p: pointer to user data
+ *
+ * Set the line discipline according to user request.
+ *
+ * Locking: see tty_set_ldisc, this function is just a helper
+ */
+
static int tiocsetd(struct tty_struct *tty, int __user *p)
{
int ldisc;
@@ -2430,6 +3030,21 @@ static int tiocsetd(struct tty_struct *tty, int __user *p)
return tty_set_ldisc(tty, ldisc);
}
+/**
+ * send_break - performed time break
+ * @tty: device to break on
+ * @duration: timeout in mS
+ *
+ * Perform a timed break on hardware that lacks its own driver level
+ * timed break functionality.
+ *
+ * Locking:
+ * None
+ *
+ * FIXME:
+ * What if two overlap
+ */
+
static int send_break(struct tty_struct *tty, unsigned int duration)
{
tty->driver->break_ctl(tty, -1);
@@ -2442,8 +3057,19 @@ static int send_break(struct tty_struct *tty, unsigned int duration)
return 0;
}
-static int
-tty_tiocmget(struct tty_struct *tty, struct file *file, int __user *p)
+/**
+ * tiocmget - get modem status
+ * @tty: tty device
+ * @file: user file pointer
+ * @p: pointer to result
+ *
+ * Obtain the modem status bits from the tty driver if the feature
+ * is supported. Return -EINVAL if it is not available.
+ *
+ * Locking: none (up to the driver)
+ */
+
+static int tty_tiocmget(struct tty_struct *tty, struct file *file, int __user *p)
{
int retval = -EINVAL;
@@ -2456,8 +3082,20 @@ tty_tiocmget(struct tty_struct *tty, struct file *file, int __user *p)
return retval;
}
-static int
-tty_tiocmset(struct tty_struct *tty, struct file *file, unsigned int cmd,
+/**
+ * tiocmset - set modem status
+ * @tty: tty device
+ * @file: user file pointer
+ * @cmd: command - clear bits, set bits or set all
+ * @p: pointer to desired bits
+ *
+ * Set the modem status bits from the tty driver if the feature
+ * is supported. Return -EINVAL if it is not available.
+ *
+ * Locking: none (up to the driver)
+ */
+
+static int tty_tiocmset(struct tty_struct *tty, struct file *file, unsigned int cmd,
unsigned __user *p)
{
int retval = -EINVAL;
@@ -2573,6 +3211,7 @@ int tty_ioctl(struct inode * inode, struct file * file,
clear_bit(TTY_EXCLUSIVE, &tty->flags);
return 0;
case TIOCNOTTY:
+ /* FIXME: taks lock or tty_mutex ? */
if (current->signal->tty != tty)
return -ENOTTY;
if (current->signal->leader)
@@ -2753,9 +3392,16 @@ void do_SAK(struct tty_struct *tty)
EXPORT_SYMBOL(do_SAK);
-/*
- * This routine is called out of the software interrupt to flush data
- * from the buffer chain to the line discipline.
+/**
+ * flush_to_ldisc
+ * @private_: tty structure passed from work queue.
+ *
+ * This routine is called out of the software interrupt to flush data
+ * from the buffer chain to the line discipline.
+ *
+ * Locking: holds tty->buf.lock to guard buffer list. Drops the lock
+ * while invoking the line discipline receive_buf method. The
+ * receive_buf method is single threaded for each tty instance.
*/
static void flush_to_ldisc(void *private_)
@@ -2831,6 +3477,8 @@ static int n_baud_table = ARRAY_SIZE(baud_table);
* Convert termios baud rate data into a speed. This should be called
* with the termios lock held if this termios is a terminal termios
* structure. May change the termios data.
+ *
+ * Locking: none
*/
int tty_termios_baud_rate(struct termios *termios)
@@ -2859,6 +3507,8 @@ EXPORT_SYMBOL(tty_termios_baud_rate);
* Returns the baud rate as an integer for this terminal. The
* termios lock must be held by the caller and the terminal bit
* flags may be updated.
+ *
+ * Locking: none
*/
int tty_get_baud_rate(struct tty_struct *tty)
@@ -2888,6 +3538,8 @@ EXPORT_SYMBOL(tty_get_baud_rate);
*
* In the event of the queue being busy for flipping the work will be
* held off and retried later.
+ *
+ * Locking: tty buffer lock. Driver locks in low latency mode.
*/
void tty_flip_buffer_push(struct tty_struct *tty)
@@ -2907,9 +3559,16 @@ void tty_flip_buffer_push(struct tty_struct *tty)
EXPORT_SYMBOL(tty_flip_buffer_push);
-/*
- * This subroutine initializes a tty structure.
+/**
+ * initialize_tty_struct
+ * @tty: tty to initialize
+ *
+ * This subroutine initializes a tty structure that has been newly
+ * allocated.
+ *
+ * Locking: none - tty in question must not be exposed at this point
*/
+
static void initialize_tty_struct(struct tty_struct *tty)
{
memset(tty, 0, sizeof(struct tty_struct));
@@ -2935,6 +3594,7 @@ static void initialize_tty_struct(struct tty_struct *tty)
/*
* The default put_char routine if the driver did not define one.
*/
+
static void tty_default_put_char(struct tty_struct *tty, unsigned char ch)
{
tty->driver->write(tty, &ch, 1);
@@ -2943,19 +3603,23 @@ static void tty_default_put_char(struct tty_struct *tty, unsigned char ch)
static struct class *tty_class;
/**
- * tty_register_device - register a tty device
- * @driver: the tty driver that describes the tty device
- * @index: the index in the tty driver for this tty device
- * @device: a struct device that is associated with this tty device.
- * This field is optional, if there is no known struct device for this
- * tty device it can be set to NULL safely.
+ * tty_register_device - register a tty device
+ * @driver: the tty driver that describes the tty device
+ * @index: the index in the tty driver for this tty device
+ * @device: a struct device that is associated with this tty device.
+ * This field is optional, if there is no known struct device
+ * for this tty device it can be set to NULL safely.
*
- * Returns a pointer to the class device (or ERR_PTR(-EFOO) on error).
+ * Returns a pointer to the class device (or ERR_PTR(-EFOO) on error).
*
- * This call is required to be made to register an individual tty device if
- * the tty driver's flags have the TTY_DRIVER_DYNAMIC_DEV bit set. If that
- * bit is not set, this function should not be called by a tty driver.
+ * This call is required to be made to register an individual tty device
+ * if the tty driver's flags have the TTY_DRIVER_DYNAMIC_DEV bit set. If
+ * that bit is not set, this function should not be called by a tty
+ * driver.
+ *
+ * Locking: ??
*/
+
struct class_device *tty_register_device(struct tty_driver *driver,
unsigned index, struct device *device)
{
@@ -2977,13 +3641,16 @@ struct class_device *tty_register_device(struct tty_driver *driver,
}
/**
- * tty_unregister_device - unregister a tty device
- * @driver: the tty driver that describes the tty device
- * @index: the index in the tty driver for this tty device
+ * tty_unregister_device - unregister a tty device
+ * @driver: the tty driver that describes the tty device
+ * @index: the index in the tty driver for this tty device
*
- * If a tty device is registered with a call to tty_register_device() then
- * this function must be made when the tty device is gone.
+ * If a tty device is registered with a call to tty_register_device() then
+ * this function must be called when the tty device is gone.
+ *
+ * Locking: ??
*/
+
void tty_unregister_device(struct tty_driver *driver, unsigned index)
{
class_device_destroy(tty_class, MKDEV(driver->major, driver->minor_start) + index);
@@ -3094,7 +3761,6 @@ int tty_register_driver(struct tty_driver *driver)
driver->cdev.owner = driver->owner;
error = cdev_add(&driver->cdev, dev, driver->num);
if (error) {
- cdev_del(&driver->cdev);
unregister_chrdev_region(dev, driver->num);
driver->ttys = NULL;
driver->termios = driver->termios_locked = NULL;
diff --git a/drivers/char/tty_ioctl.c b/drivers/char/tty_ioctl.c
index f19cf9d7792..4ad47d321bd 100644
--- a/drivers/char/tty_ioctl.c
+++ b/drivers/char/tty_ioctl.c
@@ -36,6 +36,18 @@
#define TERMIOS_WAIT 2
#define TERMIOS_TERMIO 4
+
+/**
+ * tty_wait_until_sent - wait for I/O to finish
+ * @tty: tty we are waiting for
+ * @timeout: how long we will wait
+ *
+ * Wait for characters pending in a tty driver to hit the wire, or
+ * for a timeout to occur (eg due to flow control)
+ *
+ * Locking: none
+ */
+
void tty_wait_until_sent(struct tty_struct * tty, long timeout)
{
DECLARE_WAITQUEUE(wait, current);
@@ -94,6 +106,18 @@ static void unset_locked_termios(struct termios *termios,
old->c_cc[i] : termios->c_cc[i];
}
+/**
+ * change_termios - update termios values
+ * @tty: tty to update
+ * @new_termios: desired new value
+ *
+ * Perform updates to the termios values set on this terminal. There
+ * is a bit of layering violation here with n_tty in terms of the
+ * internal knowledge of this function.
+ *
+ * Locking: termios_sem
+ */
+
static void change_termios(struct tty_struct * tty, struct termios * new_termios)
{
int canon_change;
@@ -155,6 +179,19 @@ static void change_termios(struct tty_struct * tty, struct termios * new_termios
up(&tty->termios_sem);
}
+/**
+ * set_termios - set termios values for a tty
+ * @tty: terminal device
+ * @arg: user data
+ * @opt: option information
+ *
+ * Helper function to prepare termios data and run neccessary other
+ * functions before using change_termios to do the actual changes.
+ *
+ * Locking:
+ * Called functions take ldisc and termios_sem locks
+ */
+
static int set_termios(struct tty_struct * tty, void __user *arg, int opt)
{
struct termios tmp_termios;
@@ -284,6 +321,17 @@ static void set_sgflags(struct termios * termios, int flags)
}
}
+/**
+ * set_sgttyb - set legacy terminal values
+ * @tty: tty structure
+ * @sgttyb: pointer to old style terminal structure
+ *
+ * Updates a terminal from the legacy BSD style terminal information
+ * structure.
+ *
+ * Locking: termios_sem
+ */
+
static int set_sgttyb(struct tty_struct * tty, struct sgttyb __user * sgttyb)
{
int retval;
@@ -369,9 +417,16 @@ static int set_ltchars(struct tty_struct * tty, struct ltchars __user * ltchars)
}
#endif
-/*
- * Send a high priority character to the tty.
+/**
+ * send_prio_char - send priority character
+ *
+ * Send a high priority character to the tty even if stopped
+ *
+ * Locking: none
+ *
+ * FIXME: overlapping calls with start/stop tty lose state of tty
*/
+
static void send_prio_char(struct tty_struct *tty, char ch)
{
int was_stopped = tty->stopped;
diff --git a/drivers/char/vt_ioctl.c b/drivers/char/vt_ioctl.c
index eccffaf26fa..a5628a8b662 100644
--- a/drivers/char/vt_ioctl.c
+++ b/drivers/char/vt_ioctl.c
@@ -1011,6 +1011,8 @@ int vt_ioctl(struct tty_struct *tty, struct file * file,
return -EPERM;
vt_dont_switch = 0;
return 0;
+ case VT_GETHIFONTMASK:
+ return put_user(vc->vc_hi_font_mask, (unsigned short __user *)arg);
default:
return -ENOIOCTLCMD;
}
diff --git a/drivers/ieee1394/ohci1394.c b/drivers/ieee1394/ohci1394.c
index d4bad6704bb..448df277337 100644
--- a/drivers/ieee1394/ohci1394.c
+++ b/drivers/ieee1394/ohci1394.c
@@ -3552,6 +3552,8 @@ static int ohci1394_pci_resume (struct pci_dev *pdev)
static int ohci1394_pci_suspend (struct pci_dev *pdev, pm_message_t state)
{
+ pci_save_state(pdev);
+
#ifdef CONFIG_PPC_PMAC
if (machine_is(powermac)) {
struct device_node *of_node;
@@ -3563,8 +3565,6 @@ static int ohci1394_pci_suspend (struct pci_dev *pdev, pm_message_t state)
}
#endif
- pci_save_state(pdev);
-
return 0;
}
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index be48cedf986..c54de989eb0 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -255,7 +255,9 @@ static struct region *__rh_alloc(struct region_hash *rh, region_t region)
struct region *reg, *nreg;
read_unlock(&rh->hash_lock);
- nreg = mempool_alloc(rh->region_pool, GFP_NOIO);
+ nreg = mempool_alloc(rh->region_pool, GFP_ATOMIC);
+ if (unlikely(!nreg))
+ nreg = kmalloc(sizeof(struct region), GFP_NOIO);
nreg->state = rh->log->type->in_sync(rh->log, region, 1) ?
RH_CLEAN : RH_NOSYNC;
nreg->rh = rh;
diff --git a/drivers/md/md.c b/drivers/md/md.c
index b6d16022a53..8dbab2ef388 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1597,6 +1597,19 @@ void md_update_sb(mddev_t * mddev)
repeat:
spin_lock_irq(&mddev->write_lock);
+
+ if (mddev->degraded && mddev->sb_dirty == 3)
+ /* If the array is degraded, then skipping spares is both
+ * dangerous and fairly pointless.
+ * Dangerous because a device that was removed from the array
+ * might have a event_count that still looks up-to-date,
+ * so it can be re-added without a resync.
+ * Pointless because if there are any spares to skip,
+ * then a recovery will happen and soon that array won't
+ * be degraded any more and the spare can go back to sleep then.
+ */
+ mddev->sb_dirty = 1;
+
sync_req = mddev->in_sync;
mddev->utime = get_seconds();
if (mddev->sb_dirty == 3)
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 1efe22a2d04..87bfe9e7d8c 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1625,15 +1625,16 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
return 0;
}
- /* before building a request, check if we can skip these blocks..
- * This call the bitmap_start_sync doesn't actually record anything
- */
if (mddev->bitmap == NULL &&
mddev->recovery_cp == MaxSector &&
+ !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
conf->fullsync == 0) {
*skipped = 1;
return max_sector - sector_nr;
}
+ /* before building a request, check if we can skip these blocks..
+ * This call the bitmap_start_sync doesn't actually record anything
+ */
if (!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) &&
!conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
/* We can skip this block, and probably several more */
diff --git a/drivers/mtd/nand/ams-delta.c b/drivers/mtd/nand/ams-delta.c
index d7897dc6b3c..a0ba07c36ee 100644
--- a/drivers/mtd/nand/ams-delta.c
+++ b/drivers/mtd/nand/ams-delta.c
@@ -130,11 +130,13 @@ static void ams_delta_hwcontrol(struct mtd_info *mtd, int cmd,
if (ctrl & NAND_CTRL_CHANGE) {
unsigned long bits;
- bits = (~ctrl & NAND_NCE) << 2;
- bits |= (ctrl & NAND_CLE) << 7;
- bits |= (ctrl & NAND_ALE) << 6;
+ bits = (~ctrl & NAND_NCE) ? AMS_DELTA_LATCH2_NAND_NCE : 0;
+ bits |= (ctrl & NAND_CLE) ? AMS_DELTA_LATCH2_NAND_CLE : 0;
+ bits |= (ctrl & NAND_ALE) ? AMS_DELTA_LATCH2_NAND_ALE : 0;
- ams_delta_latch2_write(0xC2, bits);
+ ams_delta_latch2_write(AMS_DELTA_LATCH2_NAND_CLE |
+ AMS_DELTA_LATCH2_NAND_ALE |
+ AMS_DELTA_LATCH2_NAND_NCE, bits);
}
if (cmd != NAND_CMD_NONE)
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 62b861304e0..c8cbc00243f 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -1093,9 +1093,10 @@ static int nand_read(struct mtd_info *mtd, loff_t from, size_t len,
ret = nand_do_read_ops(mtd, from, &chip->ops);
+ *retlen = chip->ops.retlen;
+
nand_release_device(mtd);
- *retlen = chip->ops.retlen;
return ret;
}
@@ -1691,9 +1692,10 @@ static int nand_write(struct mtd_info *mtd, loff_t to, size_t len,
ret = nand_do_write_ops(mtd, to, &chip->ops);
+ *retlen = chip->ops.retlen;
+
nand_release_device(mtd);
- *retlen = chip->ops.retlen;
return ret;
}
diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c
index d6d1bff52b8..2c7de79c83b 100644
--- a/drivers/rtc/rtc-s3c.c
+++ b/drivers/rtc/rtc-s3c.c
@@ -69,12 +69,12 @@ static void s3c_rtc_setaie(int to)
pr_debug("%s: aie=%d\n", __FUNCTION__, to);
- tmp = readb(S3C2410_RTCALM) & ~S3C2410_RTCALM_ALMEN;
+ tmp = readb(s3c_rtc_base + S3C2410_RTCALM) & ~S3C2410_RTCALM_ALMEN;
if (to)
tmp |= S3C2410_RTCALM_ALMEN;
- writeb(tmp, S3C2410_RTCALM);
+ writeb(tmp, s3c_rtc_base + S3C2410_RTCALM);
}
static void s3c_rtc_setpie(int to)
@@ -84,12 +84,12 @@ static void s3c_rtc_setpie(int to)
pr_debug("%s: pie=%d\n", __FUNCTION__, to);
spin_lock_irq(&s3c_rtc_pie_lock);
- tmp = readb(S3C2410_TICNT) & ~S3C2410_TICNT_ENABLE;
+ tmp = readb(s3c_rtc_base + S3C2410_TICNT) & ~S3C2410_TICNT_ENABLE;
if (to)
tmp |= S3C2410_TICNT_ENABLE;
- writeb(tmp, S3C2410_TICNT);
+ writeb(tmp, s3c_rtc_base + S3C2410_TICNT);
spin_unlock_irq(&s3c_rtc_pie_lock);
}
@@ -98,13 +98,13 @@ static void s3c_rtc_setfreq(int freq)
unsigned int tmp;
spin_lock_irq(&s3c_rtc_pie_lock);
- tmp = readb(S3C2410_TICNT) & S3C2410_TICNT_ENABLE;
+ tmp = readb(s3c_rtc_base + S3C2410_TICNT) & S3C2410_TICNT_ENABLE;
s3c_rtc_freq = freq;
tmp |= (128 / freq)-1;
- writeb(tmp, S3C2410_TICNT);
+ writeb(tmp, s3c_rtc_base + S3C2410_TICNT);
spin_unlock_irq(&s3c_rtc_pie_lock);
}
@@ -113,14 +113,15 @@ static void s3c_rtc_setfreq(int freq)
static int s3c_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm)
{
unsigned int have_retried = 0;
+ void __iomem *base = s3c_rtc_base;
retry_get_time:
- rtc_tm->tm_min = readb(S3C2410_RTCMIN);
- rtc_tm->tm_hour = readb(S3C2410_RTCHOUR);
- rtc_tm->tm_mday = readb(S3C2410_RTCDATE);
- rtc_tm->tm_mon = readb(S3C2410_RTCMON);
- rtc_tm->tm_year = readb(S3C2410_RTCYEAR);
- rtc_tm->tm_sec = readb(S3C2410_RTCSEC);
+ rtc_tm->tm_min = readb(base + S3C2410_RTCMIN);
+ rtc_tm->tm_hour = readb(base + S3C2410_RTCHOUR);
+ rtc_tm->tm_mday = readb(base + S3C2410_RTCDATE);
+ rtc_tm->tm_mon = readb(base + S3C2410_RTCMON);
+ rtc_tm->tm_year = readb(base + S3C2410_RTCYEAR);
+ rtc_tm->tm_sec = readb(base + S3C2410_RTCSEC);
/* the only way to work out wether the system was mid-update
* when we read it is to check the second counter, and if it
@@ -151,17 +152,26 @@ static int s3c_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm)
static int s3c_rtc_settime(struct device *dev, struct rtc_time *tm)
{
- /* the rtc gets round the y2k problem by just not supporting it */
+ void __iomem *base = s3c_rtc_base;
+ int year = tm->tm_year - 100;
- if (tm->tm_year < 100)
+ pr_debug("set time %02d.%02d.%02d %02d/%02d/%02d\n",
+ tm->tm_year, tm->tm_mon, tm->tm_mday,
+ tm->tm_hour, tm->tm_min, tm->tm_sec);
+
+ /* we get around y2k by simply not supporting it */
+
+ if (year < 0 || year >= 100) {
+ dev_err(dev, "rtc only supports 100 years\n");
return -EINVAL;
+ }
- writeb(BIN2BCD(tm->tm_sec), S3C2410_RTCSEC);
- writeb(BIN2BCD(tm->tm_min), S3C2410_RTCMIN);
- writeb(BIN2BCD(tm->tm_hour), S3C2410_RTCHOUR);
- writeb(BIN2BCD(tm->tm_mday), S3C2410_RTCDATE);
- writeb(BIN2BCD(tm->tm_mon + 1), S3C2410_RTCMON);
- writeb(BIN2BCD(tm->tm_year - 100), S3C2410_RTCYEAR);
+ writeb(BIN2BCD(tm->tm_sec), base + S3C2410_RTCSEC);
+ writeb(BIN2BCD(tm->tm_min), base + S3C2410_RTCMIN);
+ writeb(BIN2BCD(tm->tm_hour), base + S3C2410_RTCHOUR);
+ writeb(BIN2BCD(tm->tm_mday), base + S3C2410_RTCDATE);
+ writeb(BIN2BCD(tm->tm_mon + 1), base + S3C2410_RTCMON);
+ writeb(BIN2BCD(year), base + S3C2410_RTCYEAR);
return 0;
}
@@ -169,16 +179,17 @@ static int s3c_rtc_settime(struct device *dev, struct rtc_time *tm)
static int s3c_rtc_getalarm(struct device *dev, struct rtc_wkalrm *alrm)
{
struct rtc_time *alm_tm = &alrm->time;
+ void __iomem *base = s3c_rtc_base;
unsigned int alm_en;
- alm_tm->tm_sec = readb(S3C2410_ALMSEC);
- alm_tm->tm_min = readb(S3C2410_ALMMIN);
- alm_tm->tm_hour = readb(S3C2410_ALMHOUR);
- alm_tm->tm_mon = readb(S3C2410_ALMMON);
- alm_tm->tm_mday = readb(S3C2410_ALMDATE);
- alm_tm->tm_year = readb(S3C2410_ALMYEAR);
+ alm_tm->tm_sec = readb(base + S3C2410_ALMSEC);
+ alm_tm->tm_min = readb(base + S3C2410_ALMMIN);
+ alm_tm->tm_hour = readb(base + S3C2410_ALMHOUR);
+ alm_tm->tm_mon = readb(base + S3C2410_ALMMON);
+ alm_tm->tm_mday = readb(base + S3C2410_ALMDATE);
+ alm_tm->tm_year = readb(base + S3C2410_ALMYEAR);
- alm_en = readb(S3C2410_RTCALM);
+ alm_en = readb(base + S3C2410_RTCALM);
pr_debug("read alarm %02x %02x.%02x.%02x %02x/%02x/%02x\n",
alm_en,
@@ -226,6 +237,7 @@ static int s3c_rtc_getalarm(struct device *dev, struct rtc_wkalrm *alrm)
static int s3c_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
{
struct rtc_time *tm = &alrm->time;
+ void __iomem *base = s3c_rtc_base;
unsigned int alrm_en;
pr_debug("s3c_rtc_setalarm: %d, %02x/%02x/%02x %02x.%02x.%02x\n",
@@ -234,32 +246,32 @@ static int s3c_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
tm->tm_hour & 0xff, tm->tm_min & 0xff, tm->tm_sec);
- alrm_en = readb(S3C2410_RTCALM) & S3C2410_RTCALM_ALMEN;
- writeb(0x00, S3C2410_RTCALM);
+ alrm_en = readb(base + S3C2410_RTCALM) & S3C2410_RTCALM_ALMEN;
+ writeb(0x00, base + S3C2410_RTCALM);
if (tm->tm_sec < 60 && tm->tm_sec >= 0) {
alrm_en |= S3C2410_RTCALM_SECEN;
- writeb(BIN2BCD(tm->tm_sec), S3C2410_ALMSEC);
+ writeb(BIN2BCD(tm->tm_sec), base + S3C2410_ALMSEC);
}
if (tm->tm_min < 60 && tm->tm_min >= 0) {
alrm_en |= S3C2410_RTCALM_MINEN;
- writeb(BIN2BCD(tm->tm_min), S3C2410_ALMMIN);
+ writeb(BIN2BCD(tm->tm_min), base + S3C2410_ALMMIN);
}
if (tm->tm_hour < 24 && tm->tm_hour >= 0) {
alrm_en |= S3C2410_RTCALM_HOUREN;
- writeb(BIN2BCD(tm->tm_hour), S3C2410_ALMHOUR);
+ writeb(BIN2BCD(tm->tm_hour), base + S3C2410_ALMHOUR);
}
pr_debug("setting S3C2410_RTCALM to %08x\n", alrm_en);
- writeb(alrm_en, S3C2410_RTCALM);
+ writeb(alrm_en, base + S3C2410_RTCALM);
if (0) {
- alrm_en = readb(S3C2410_RTCALM);
+ alrm_en = readb(base + S3C2410_RTCALM);
alrm_en &= ~S3C2410_RTCALM_ALMEN;
- writeb(alrm_en, S3C2410_RTCALM);
+ writeb(alrm_en, base + S3C2410_RTCALM);
disable_irq_wake(s3c_rtc_alarmno);
}
@@ -319,8 +331,8 @@ static int s3c_rtc_ioctl(struct device *dev,
static int s3c_rtc_proc(struct device *dev, struct seq_file *seq)
{
- unsigned int rtcalm = readb(S3C2410_RTCALM);
- unsigned int ticnt = readb (S3C2410_TICNT);
+ unsigned int rtcalm = readb(s3c_rtc_base + S3C2410_RTCALM);
+ unsigned int ticnt = readb(s3c_rtc_base + S3C2410_TICNT);
seq_printf(seq, "alarm_IRQ\t: %s\n",
(rtcalm & S3C2410_RTCALM_ALMEN) ? "yes" : "no" );
@@ -387,39 +399,40 @@ static struct rtc_class_ops s3c_rtcops = {
static void s3c_rtc_enable(struct platform_device *pdev, int en)
{
+ void __iomem *base = s3c_rtc_base;
unsigned int tmp;
if (s3c_rtc_base == NULL)
return;
if (!en) {
- tmp = readb(S3C2410_RTCCON);
- writeb(tmp & ~S3C2410_RTCCON_RTCEN, S3C2410_RTCCON);
+ tmp = readb(base + S3C2410_RTCCON);
+ writeb(tmp & ~S3C2410_RTCCON_RTCEN, base + S3C2410_RTCCON);
- tmp = readb(S3C2410_TICNT);
- writeb(tmp & ~S3C2410_TICNT_ENABLE, S3C2410_TICNT);
+ tmp = readb(base + S3C2410_TICNT);
+ writeb(tmp & ~S3C2410_TICNT_ENABLE, base + S3C2410_TICNT);
} else {
/* re-enable the device, and check it is ok */
- if ((readb(S3C2410_RTCCON) & S3C2410_RTCCON_RTCEN) == 0){
+ if ((readb(base+S3C2410_RTCCON) & S3C2410_RTCCON_RTCEN) == 0){
dev_info(&pdev->dev, "rtc disabled, re-enabling\n");
- tmp = readb(S3C2410_RTCCON);
- writeb(tmp | S3C2410_RTCCON_RTCEN , S3C2410_RTCCON);
+ tmp = readb(base + S3C2410_RTCCON);
+ writeb(tmp|S3C2410_RTCCON_RTCEN, base+S3C2410_RTCCON);
}
- if ((readb(S3C2410_RTCCON) & S3C2410_RTCCON_CNTSEL)){
+ if ((readb(base + S3C2410_RTCCON) & S3C2410_RTCCON_CNTSEL)){
dev_info(&pdev->dev, "removing RTCCON_CNTSEL\n");
- tmp = readb(S3C2410_RTCCON);
- writeb(tmp& ~S3C2410_RTCCON_CNTSEL , S3C2410_RTCCON);
+ tmp = readb(base + S3C2410_RTCCON);
+ writeb(tmp& ~S3C2410_RTCCON_CNTSEL, base+S3C2410_RTCCON);
}
- if ((readb(S3C2410_RTCCON) & S3C2410_RTCCON_CLKRST)){
+ if ((readb(base + S3C2410_RTCCON) & S3C2410_RTCCON_CLKRST)){
dev_info(&pdev->dev, "removing RTCCON_CLKRST\n");
- tmp = readb(S3C2410_RTCCON);
- writeb(tmp & ~S3C2410_RTCCON_CLKRST, S3C2410_RTCCON);
+ tmp = readb(base + S3C2410_RTCCON);
+ writeb(tmp & ~S3C2410_RTCCON_CLKRST, base+S3C2410_RTCCON);
}
}
}
@@ -475,8 +488,8 @@ static int s3c_rtc_probe(struct platform_device *pdev)
}
s3c_rtc_mem = request_mem_region(res->start,
- res->end-res->start+1,
- pdev->name);
+ res->end-res->start+1,
+ pdev->name);
if (s3c_rtc_mem == NULL) {
dev_err(&pdev->dev, "failed to reserve memory region\n");
@@ -495,7 +508,8 @@ static int s3c_rtc_probe(struct platform_device *pdev)
s3c_rtc_enable(pdev, 1);
- pr_debug("s3c2410_rtc: RTCCON=%02x\n", readb(S3C2410_RTCCON));
+ pr_debug("s3c2410_rtc: RTCCON=%02x\n",
+ readb(s3c_rtc_base + S3C2410_RTCCON));
s3c_rtc_setfreq(s3c_rtc_freq);
@@ -543,7 +557,7 @@ static int s3c_rtc_suspend(struct platform_device *pdev, pm_message_t state)
/* save TICNT for anyone using periodic interrupts */
- ticnt_save = readb(S3C2410_TICNT);
+ ticnt_save = readb(s3c_rtc_base + S3C2410_TICNT);
/* calculate time delta for suspend */
@@ -567,7 +581,7 @@ static int s3c_rtc_resume(struct platform_device *pdev)
rtc_tm_to_time(&tm, &time.tv_sec);
restore_time_delta(&s3c_rtc_delta, &time);
- writeb(ticnt_save, S3C2410_TICNT);
+ writeb(ticnt_save, s3c_rtc_base + S3C2410_TICNT);
return 0;
}
#else
diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c
index f7b5d7372d2..94d1de55607 100644
--- a/drivers/scsi/ide-scsi.c
+++ b/drivers/scsi/ide-scsi.c
@@ -517,7 +517,7 @@ static ide_startstop_t idescsi_pc_intr (ide_drive_t *drive)
/* No more interrupts */
if (test_bit(IDESCSI_LOG_CMD, &scsi->log))
printk (KERN_INFO "Packet command completed, %d bytes transferred\n", pc->actually_transferred);
- local_irq_enable();
+ local_irq_enable_in_hardirq();
if (status.b.check)
rq->errors++;
idescsi_end_request (drive, 1, 0);
diff --git a/drivers/video/imacfb.c b/drivers/video/imacfb.c
index b485bece5fc..18ea4a54910 100644
--- a/drivers/video/imacfb.c
+++ b/drivers/video/imacfb.c
@@ -71,10 +71,10 @@ static int set_system(struct dmi_system_id *id)
static struct dmi_system_id __initdata dmi_system_table[] = {
{ set_system, "iMac4,1", {
DMI_MATCH(DMI_BIOS_VENDOR,"Apple Computer, Inc."),
- DMI_MATCH(DMI_BIOS_VERSION,"iMac4,1") }, (void*)M_I17},
+ DMI_MATCH(DMI_PRODUCT_NAME,"iMac4,1") }, (void*)M_I17},
{ set_system, "MacBookPro1,1", {
DMI_MATCH(DMI_BIOS_VENDOR,"Apple Computer, Inc."),
- DMI_MATCH(DMI_BIOS_VERSION,"MacBookPro1,1") }, (void*)M_I17},
+ DMI_MATCH(DMI_PRODUCT_NAME,"MacBookPro1,1") }, (void*)M_I17},
{ set_system, "MacBook1,1", {
DMI_MATCH(DMI_BIOS_VENDOR,"Apple Computer, Inc."),
DMI_MATCH(DMI_PRODUCT_NAME,"MacBook1,1")}, (void *)M_MACBOOK},
diff --git a/drivers/video/matrox/g450_pll.c b/drivers/video/matrox/g450_pll.c
index 440272ad10e..7c76e079ca7 100644
--- a/drivers/video/matrox/g450_pll.c
+++ b/drivers/video/matrox/g450_pll.c
@@ -331,7 +331,15 @@ static int __g450_setclk(WPMINFO unsigned int fout, unsigned int pll,
tmp |= M1064_XPIXCLKCTRL_PLL_UP;
}
matroxfb_DAC_out(PMINFO M1064_XPIXCLKCTRL, tmp);
+#ifdef __powerpc__
+ /* This is necessary to avoid jitter on PowerPC
+ * (OpenFirmware) systems, but apparently
+ * introduces jitter, at least on a x86-64
+ * using DVI.
+ * A simple workaround is disable for non-PPC.
+ */
matroxfb_DAC_out(PMINFO M1064_XDVICLKCTRL, 0);
+#endif /* __powerpc__ */
matroxfb_DAC_out(PMINFO M1064_XPWRCTRL, xpwrctrl);
matroxfb_DAC_unlock_irqrestore(flags);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 37534573960..045f98854f1 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -884,6 +884,61 @@ void bd_set_size(struct block_device *bdev, loff_t size)
}
EXPORT_SYMBOL(bd_set_size);
+static int __blkdev_put(struct block_device *bdev, unsigned int subclass)
+{
+ int ret = 0;
+ struct inode *bd_inode = bdev->bd_inode;
+ struct gendisk *disk = bdev->bd_disk;
+
+ mutex_lock_nested(&bdev->bd_mutex, subclass);
+ lock_kernel();
+ if (!--bdev->bd_openers) {
+ sync_blockdev(bdev);
+ kill_bdev(bdev);
+ }
+ if (bdev->bd_contains == bdev) {
+ if (disk->fops->release)
+ ret = disk->fops->release(bd_inode, NULL);
+ } else {
+ mutex_lock_nested(&bdev->bd_contains->bd_mutex,
+ subclass + 1);
+ bdev->bd_contains->bd_part_count--;
+ mutex_unlock(&bdev->bd_contains->bd_mutex);
+ }
+ if (!bdev->bd_openers) {
+ struct module *owner = disk->fops->owner;
+
+ put_disk(disk);
+ module_put(owner);
+
+ if (bdev->bd_contains != bdev) {
+ kobject_put(&bdev->bd_part->kobj);
+ bdev->bd_part = NULL;
+ }
+ bdev->bd_disk = NULL;
+ bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
+ if (bdev != bdev->bd_contains)
+ __blkdev_put(bdev->bd_contains, subclass + 1);
+ bdev->bd_contains = NULL;
+ }
+ unlock_kernel();
+ mutex_unlock(&bdev->bd_mutex);
+ bdput(bdev);
+ return ret;
+}
+
+int blkdev_put(struct block_device *bdev)
+{
+ return __blkdev_put(bdev, BD_MUTEX_NORMAL);
+}
+EXPORT_SYMBOL(blkdev_put);
+
+int blkdev_put_partition(struct block_device *bdev)
+{
+ return __blkdev_put(bdev, BD_MUTEX_PARTITION);
+}
+EXPORT_SYMBOL(blkdev_put_partition);
+
static int
blkdev_get_whole(struct block_device *bdev, mode_t mode, unsigned flags);
@@ -980,7 +1035,7 @@ out_first:
bdev->bd_disk = NULL;
bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
if (bdev != bdev->bd_contains)
- blkdev_put(bdev->bd_contains);
+ __blkdev_put(bdev->bd_contains, BD_MUTEX_WHOLE);
bdev->bd_contains = NULL;
put_disk(disk);
module_put(owner);
@@ -1079,63 +1134,6 @@ static int blkdev_open(struct inode * inode, struct file * filp)
return res;
}
-static int __blkdev_put(struct block_device *bdev, unsigned int subclass)
-{
- int ret = 0;
- struct inode *bd_inode = bdev->bd_inode;
- struct gendisk *disk = bdev->bd_disk;
-
- mutex_lock_nested(&bdev->bd_mutex, subclass);
- lock_kernel();
- if (!--bdev->bd_openers) {
- sync_blockdev(bdev);
- kill_bdev(bdev);
- }
- if (bdev->bd_contains == bdev) {
- if (disk->fops->release)
- ret = disk->fops->release(bd_inode, NULL);
- } else {
- mutex_lock_nested(&bdev->bd_contains->bd_mutex,
- subclass + 1);
- bdev->bd_contains->bd_part_count--;
- mutex_unlock(&bdev->bd_contains->bd_mutex);
- }
- if (!bdev->bd_openers) {
- struct module *owner = disk->fops->owner;
-
- put_disk(disk);
- module_put(owner);
-
- if (bdev->bd_contains != bdev) {
- kobject_put(&bdev->bd_part->kobj);
- bdev->bd_part = NULL;
- }
- bdev->bd_disk = NULL;
- bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
- if (bdev != bdev->bd_contains)
- __blkdev_put(bdev->bd_contains, subclass + 1);
- bdev->bd_contains = NULL;
- }
- unlock_kernel();
- mutex_unlock(&bdev->bd_mutex);
- bdput(bdev);
- return ret;
-}
-
-int blkdev_put(struct block_device *bdev)
-{
- return __blkdev_put(bdev, BD_MUTEX_NORMAL);
-}
-
-EXPORT_SYMBOL(blkdev_put);
-
-int blkdev_put_partition(struct block_device *bdev)
-{
- return __blkdev_put(bdev, BD_MUTEX_PARTITION);
-}
-
-EXPORT_SYMBOL(blkdev_put_partition);
-
static int blkdev_close(struct inode * inode, struct file * filp)
{
struct block_device *bdev = I_BDEV(filp->f_mapping->host);
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 19ffb043abb..3a3567433b9 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1168,7 +1168,7 @@ static int ep_unlink(struct eventpoll *ep, struct epitem *epi)
eexit_1:
DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_unlink(%p, %p) = %d\n",
- current, ep, epi->file, error));
+ current, ep, epi->ffd.file, error));
return error;
}
@@ -1236,7 +1236,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
struct eventpoll *ep = epi->ep;
DNPRINTK(3, (KERN_INFO "[%p] eventpoll: poll_callback(%p) epi=%p ep=%p\n",
- current, epi->file, epi, ep));
+ current, epi->ffd.file, epi, ep));
write_lock_irqsave(&ep->lock, flags);
diff --git a/fs/exec.c b/fs/exec.c
index f7aabfeca03..54135df2a96 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -751,7 +751,7 @@ no_thread_group:
write_lock_irq(&tasklist_lock);
spin_lock(&oldsighand->siglock);
- spin_lock(&newsighand->siglock);
+ spin_lock_nested(&newsighand->siglock, SINGLE_DEPTH_NESTING);
rcu_assign_pointer(current->sighand, newsighand);
recalc_sigpending();
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index f2702cda977..681dea8f953 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -775,7 +775,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
if (EXT2_INODE_SIZE(sb) == 0)
goto cantfind_ext2;
sbi->s_inodes_per_block = sb->s_blocksize / EXT2_INODE_SIZE(sb);
- if (sbi->s_inodes_per_block == 0)
+ if (sbi->s_inodes_per_block == 0 || sbi->s_inodes_per_group == 0)
goto cantfind_ext2;
sbi->s_itb_per_group = sbi->s_inodes_per_group /
sbi->s_inodes_per_block;
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index a504a40d6d2..063d994bda0 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -1269,12 +1269,12 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
goal = le32_to_cpu(es->s_first_data_block);
group_no = (goal - le32_to_cpu(es->s_first_data_block)) /
EXT3_BLOCKS_PER_GROUP(sb);
+ goal_group = group_no;
+retry_alloc:
gdp = ext3_get_group_desc(sb, group_no, &gdp_bh);
if (!gdp)
goto io_error;
- goal_group = group_no;
-retry:
free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
/*
* if there is not enough free blocks to make a new resevation
@@ -1349,7 +1349,7 @@ retry:
if (my_rsv) {
my_rsv = NULL;
group_no = goal_group;
- goto retry;
+ goto retry_alloc;
}
/* No space left on the device */
*errp = -ENOSPC;
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 0971814c38b..42da6078431 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -261,7 +261,7 @@ void journal_commit_transaction(journal_t *journal)
struct buffer_head *bh = jh2bh(jh);
jbd_lock_bh_state(bh);
- kfree(jh->b_committed_data);
+ jbd_slab_free(jh->b_committed_data, bh->b_size);
jh->b_committed_data = NULL;
jbd_unlock_bh_state(bh);
}
@@ -745,14 +745,14 @@ restart_loop:
* Otherwise, we can just throw away the frozen data now.
*/
if (jh->b_committed_data) {
- kfree(jh->b_committed_data);
+ jbd_slab_free(jh->b_committed_data, bh->b_size);
jh->b_committed_data = NULL;
if (jh->b_frozen_data) {
jh->b_committed_data = jh->b_frozen_data;
jh->b_frozen_data = NULL;
}
} else if (jh->b_frozen_data) {
- kfree(jh->b_frozen_data);
+ jbd_slab_free(jh->b_frozen_data, bh->b_size);
jh->b_frozen_data = NULL;
}
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 8c9b28dff11..f66724ce443 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -84,6 +84,7 @@ EXPORT_SYMBOL(journal_force_commit);
static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
static void __journal_abort_soft (journal_t *journal, int errno);
+static int journal_create_jbd_slab(size_t slab_size);
/*
* Helper function used to manage commit timeouts
@@ -328,10 +329,10 @@ repeat:
char *tmp;
jbd_unlock_bh_state(bh_in);
- tmp = jbd_rep_kmalloc(bh_in->b_size, GFP_NOFS);
+ tmp = jbd_slab_alloc(bh_in->b_size, GFP_NOFS);
jbd_lock_bh_state(bh_in);
if (jh_in->b_frozen_data) {
- kfree(tmp);
+ jbd_slab_free(tmp, bh_in->b_size);
goto repeat;
}
@@ -1069,17 +1070,17 @@ static int load_superblock(journal_t *journal)
int journal_load(journal_t *journal)
{
int err;
+ journal_superblock_t *sb;
err = load_superblock(journal);
if (err)
return err;
+ sb = journal->j_superblock;
/* If this is a V2 superblock, then we have to check the
* features flags on it. */
if (journal->j_format_version >= 2) {
- journal_superblock_t *sb = journal->j_superblock;
-
if ((sb->s_feature_ro_compat &
~cpu_to_be32(JFS_KNOWN_ROCOMPAT_FEATURES)) ||
(sb->s_feature_incompat &
@@ -1090,6 +1091,13 @@ int journal_load(journal_t *journal)
}
}
+ /*
+ * Create a slab for this blocksize
+ */
+ err = journal_create_jbd_slab(cpu_to_be32(sb->s_blocksize));
+ if (err)
+ return err;
+
/* Let the recovery code check whether it needs to recover any
* data from the journal. */
if (journal_recover(journal))
@@ -1612,6 +1620,77 @@ void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry)
}
/*
+ * jbd slab management: create 1k, 2k, 4k, 8k slabs as needed
+ * and allocate frozen and commit buffers from these slabs.
+ *
+ * Reason for doing this is to avoid, SLAB_DEBUG - since it could
+ * cause bh to cross page boundary.
+ */
+
+#define JBD_MAX_SLABS 5
+#define JBD_SLAB_INDEX(size) (size >> 11)
+
+static kmem_cache_t *jbd_slab[JBD_MAX_SLABS];
+static const char *jbd_slab_names[JBD_MAX_SLABS] = {
+ "jbd_1k", "jbd_2k", "jbd_4k", NULL, "jbd_8k"
+};
+
+static void journal_destroy_jbd_slabs(void)
+{
+ int i;
+
+ for (i = 0; i < JBD_MAX_SLABS; i++) {
+ if (jbd_slab[i])
+ kmem_cache_destroy(jbd_slab[i]);
+ jbd_slab[i] = NULL;
+ }
+}
+
+static int journal_create_jbd_slab(size_t slab_size)
+{
+ int i = JBD_SLAB_INDEX(slab_size);
+
+ BUG_ON(i >= JBD_MAX_SLABS);
+
+ /*
+ * Check if we already have a slab created for this size
+ */
+ if (jbd_slab[i])
+ return 0;
+
+ /*
+ * Create a slab and force alignment to be same as slabsize -
+ * this will make sure that allocations won't cross the page
+ * boundary.
+ */
+ jbd_slab[i] = kmem_cache_create(jbd_slab_names[i],
+ slab_size, slab_size, 0, NULL, NULL);
+ if (!jbd_slab[i]) {
+ printk(KERN_EMERG "JBD: no memory for jbd_slab cache\n");
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+void * jbd_slab_alloc(size_t size, gfp_t flags)
+{
+ int idx;
+
+ idx = JBD_SLAB_INDEX(size);
+ BUG_ON(jbd_slab[idx] == NULL);
+ return kmem_cache_alloc(jbd_slab[idx], flags | __GFP_NOFAIL);
+}
+
+void jbd_slab_free(void *ptr, size_t size)
+{
+ int idx;
+
+ idx = JBD_SLAB_INDEX(size);
+ BUG_ON(jbd_slab[idx] == NULL);
+ kmem_cache_free(jbd_slab[idx], ptr);
+}
+
+/*
* Journal_head storage management
*/
static kmem_cache_t *journal_head_cache;
@@ -1799,13 +1878,13 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
printk(KERN_WARNING "%s: freeing "
"b_frozen_data\n",
__FUNCTION__);
- kfree(jh->b_frozen_data);
+ jbd_slab_free(jh->b_frozen_data, bh->b_size);
}
if (jh->b_committed_data) {
printk(KERN_WARNING "%s: freeing "
"b_committed_data\n",
__FUNCTION__);
- kfree(jh->b_committed_data);
+ jbd_slab_free(jh->b_committed_data, bh->b_size);
}
bh->b_private = NULL;
jh->b_bh = NULL; /* debug, really */
@@ -1961,6 +2040,7 @@ static void journal_destroy_caches(void)
journal_destroy_revoke_caches();
journal_destroy_journal_head_cache();
journal_destroy_handle_cache();
+ journal_destroy_jbd_slabs();
}
static int __init journal_init(void)
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 508b2ea91f4..de2e4cbbf79 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -666,8 +666,9 @@ repeat:
if (!frozen_buffer) {
JBUFFER_TRACE(jh, "allocate memory for buffer");
jbd_unlock_bh_state(bh);
- frozen_buffer = jbd_kmalloc(jh2bh(jh)->b_size,
- GFP_NOFS);
+ frozen_buffer =
+ jbd_slab_alloc(jh2bh(jh)->b_size,
+ GFP_NOFS);
if (!frozen_buffer) {
printk(KERN_EMERG
"%s: OOM for frozen_buffer\n",
@@ -879,7 +880,7 @@ int journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
repeat:
if (!jh->b_committed_data) {
- committed_data = jbd_kmalloc(jh2bh(jh)->b_size, GFP_NOFS);
+ committed_data = jbd_slab_alloc(jh2bh(jh)->b_size, GFP_NOFS);
if (!committed_data) {
printk(KERN_EMERG "%s: No memory for committed data\n",
__FUNCTION__);
@@ -906,7 +907,7 @@ repeat:
out:
journal_put_journal_head(jh);
if (unlikely(committed_data))
- kfree(committed_data);
+ jbd_slab_free(committed_data, bh->b_size);
return err;
}
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 9ea91c5eeb7..330ff9fc7cf 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -204,6 +204,8 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
/*
* Allocate the buffer map to keep the superblock small.
*/
+ if (sbi->s_imap_blocks == 0 || sbi->s_zmap_blocks == 0)
+ goto out_illegal_sb;
i = (sbi->s_imap_blocks + sbi->s_zmap_blocks) * sizeof(bh);
map = kmalloc(i, GFP_KERNEL);
if (!map)
@@ -263,7 +265,7 @@ out_no_root:
out_no_bitmap:
printk("MINIX-fs: bad superblock or unable to read bitmaps\n");
- out_freemap:
+out_freemap:
for (i = 0; i < sbi->s_imap_blocks; i++)
brelse(sbi->s_imap[i]);
for (i = 0; i < sbi->s_zmap_blocks; i++)
@@ -276,11 +278,16 @@ out_no_map:
printk("MINIX-fs: can't allocate map\n");
goto out_release;
+out_illegal_sb:
+ if (!silent)
+ printk("MINIX-fs: bad superblock\n");
+ goto out_release;
+
out_no_fs:
if (!silent)
printk("VFS: Can't find a Minix or Minix V2 filesystem "
"on device %s\n", s->s_id);
- out_release:
+out_release:
brelse(bh);
goto out;
@@ -290,7 +297,7 @@ out_bad_hblock:
out_bad_sb:
printk("MINIX-fs: unable to read superblock\n");
- out:
+out:
s->s_fs_info = NULL;
kfree(sbi);
return -EINVAL;
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 9f2cfc30f9c..94215622544 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -169,7 +169,7 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
"Mapped: %8lu kB\n"
"Slab: %8lu kB\n"
"PageTables: %8lu kB\n"
- "NFS Unstable: %8lu kB\n"
+ "NFS_Unstable: %8lu kB\n"
"Bounce: %8lu kB\n"
"CommitLimit: %8lu kB\n"
"Committed_AS: %8lu kB\n"
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 39fedaa88a0..d935fb9394e 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -424,7 +424,7 @@ int xattr_readdir(struct file *file, filldir_t filler, void *buf)
int res = -ENOTDIR;
if (!file->f_op || !file->f_op->readdir)
goto out;
- mutex_lock(&inode->i_mutex);
+ mutex_lock_nested(&inode->i_mutex, I_MUTEX_XATTR);
// down(&inode->i_zombie);
res = -ENOENT;
if (!IS_DEADDIR(inode)) {
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index e7c8615beb6..30c6e8a9446 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -169,18 +169,20 @@ static void ufs_clear_frag(struct inode *inode, struct buffer_head *bh)
static struct buffer_head *
ufs_clear_frags(struct inode *inode, sector_t beg,
- unsigned int n)
+ unsigned int n, sector_t want)
{
- struct buffer_head *res, *bh;
+ struct buffer_head *res = NULL, *bh;
sector_t end = beg + n;
- res = sb_getblk(inode->i_sb, beg);
- ufs_clear_frag(inode, res);
- for (++beg; beg < end; ++beg) {
+ for (; beg < end; ++beg) {
bh = sb_getblk(inode->i_sb, beg);
ufs_clear_frag(inode, bh);
- brelse(bh);
+ if (want != beg)
+ brelse(bh);
+ else
+ res = bh;
}
+ BUG_ON(!res);
return res;
}
@@ -265,7 +267,9 @@ repeat:
lastfrag = ufsi->i_lastfrag;
}
- goal = fs32_to_cpu(sb, ufsi->i_u1.i_data[lastblock]) + uspi->s_fpb;
+ tmp = fs32_to_cpu(sb, ufsi->i_u1.i_data[lastblock]);
+ if (tmp)
+ goal = tmp + uspi->s_fpb;
tmp = ufs_new_fragments (inode, p, fragment - blockoff,
goal, required + blockoff,
err, locked_page);
@@ -277,13 +281,15 @@ repeat:
tmp = ufs_new_fragments(inode, p, fragment - (blockoff - lastblockoff),
fs32_to_cpu(sb, *p), required + (blockoff - lastblockoff),
err, locked_page);
- }
+ } else /* (lastblock > block) */ {
/*
* We will allocate new block before last allocated block
*/
- else /* (lastblock > block) */ {
- if (lastblock && (tmp = fs32_to_cpu(sb, ufsi->i_u1.i_data[lastblock-1])))
- goal = tmp + uspi->s_fpb;
+ if (block) {
+ tmp = fs32_to_cpu(sb, ufsi->i_u1.i_data[block-1]);
+ if (tmp)
+ goal = tmp + uspi->s_fpb;
+ }
tmp = ufs_new_fragments(inode, p, fragment - blockoff,
goal, uspi->s_fpb, err, locked_page);
}
@@ -296,7 +302,7 @@ repeat:
}
if (!phys) {
- result = ufs_clear_frags(inode, tmp + blockoff, required);
+ result = ufs_clear_frags(inode, tmp, required, tmp + blockoff);
} else {
*phys = tmp + blockoff;
result = NULL;
@@ -383,7 +389,7 @@ repeat:
}
}
- if (block && (tmp = fs32_to_cpu(sb, ((__fs32*)bh->b_data)[block-1]) + uspi->s_fpb))
+ if (block && (tmp = fs32_to_cpu(sb, ((__fs32*)bh->b_data)[block-1])))
goal = tmp + uspi->s_fpb;
else
goal = bh->b_blocknr + uspi->s_fpb;
@@ -397,7 +403,8 @@ repeat:
if (!phys) {
- result = ufs_clear_frags(inode, tmp + blockoff, uspi->s_fpb);
+ result = ufs_clear_frags(inode, tmp, uspi->s_fpb,
+ tmp + blockoff);
} else {
*phys = tmp + blockoff;
*new = 1;
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index c9b55872079..ea11d04c41a 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -375,17 +375,15 @@ static int ufs_alloc_lastblock(struct inode *inode)
int err = 0;
struct address_space *mapping = inode->i_mapping;
struct ufs_sb_private_info *uspi = UFS_SB(inode->i_sb)->s_uspi;
- struct ufs_inode_info *ufsi = UFS_I(inode);
unsigned lastfrag, i, end;
struct page *lastpage;
struct buffer_head *bh;
lastfrag = (i_size_read(inode) + uspi->s_fsize - 1) >> uspi->s_fshift;
- if (!lastfrag) {
- ufsi->i_lastfrag = 0;
+ if (!lastfrag)
goto out;
- }
+
lastfrag--;
lastpage = ufs_get_locked_page(mapping, lastfrag >>
@@ -400,25 +398,25 @@ static int ufs_alloc_lastblock(struct inode *inode)
for (i = 0; i < end; ++i)
bh = bh->b_this_page;
- if (!buffer_mapped(bh)) {
- err = ufs_getfrag_block(inode, lastfrag, bh, 1);
-
- if (unlikely(err))
- goto out_unlock;
-
- if (buffer_new(bh)) {
- clear_buffer_new(bh);
- unmap_underlying_metadata(bh->b_bdev,
- bh->b_blocknr);
- /*
- * we do not zeroize fragment, because of
- * if it maped to hole, it already contains zeroes
- */
- set_buffer_uptodate(bh);
- mark_buffer_dirty(bh);
- set_page_dirty(lastpage);
- }
+
+ err = ufs_getfrag_block(inode, lastfrag, bh, 1);
+
+ if (unlikely(err))
+ goto out_unlock;
+
+ if (buffer_new(bh)) {
+ clear_buffer_new(bh);
+ unmap_underlying_metadata(bh->b_bdev,
+ bh->b_blocknr);
+ /*
+ * we do not zeroize fragment, because of
+ * if it maped to hole, it already contains zeroes
+ */
+ set_buffer_uptodate(bh);
+ mark_buffer_dirty(bh);
+ set_page_dirty(lastpage);
}
+
out_unlock:
ufs_put_locked_page(lastpage);
out:
@@ -440,23 +438,11 @@ int ufs_truncate(struct inode *inode, loff_t old_i_size)
if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
return -EPERM;
- if (inode->i_size > old_i_size) {
- /*
- * if we expand file we should care about
- * allocation of block for last byte first of all
- */
- err = ufs_alloc_lastblock(inode);
+ err = ufs_alloc_lastblock(inode);
- if (err) {
- i_size_write(inode, old_i_size);
- goto out;
- }
- /*
- * go away, because of we expand file, and we do not
- * need free blocks, and zeroizes page
- */
- lock_kernel();
- goto almost_end;
+ if (err) {
+ i_size_write(inode, old_i_size);
+ goto out;
}
block_truncate_page(inode->i_mapping, inode->i_size, ufs_getfrag_block);
@@ -477,21 +463,8 @@ int ufs_truncate(struct inode *inode, loff_t old_i_size)
yield();
}
- if (inode->i_size < old_i_size) {
- /*
- * now we should have enough space
- * to allocate block for last byte
- */
- err = ufs_alloc_lastblock(inode);
- if (err)
- /*
- * looks like all the same - we have no space,
- * but we truncate file already
- */
- inode->i_size = (ufsi->i_lastfrag - 1) * uspi->s_fsize;
- }
-almost_end:
inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
+ ufsi->i_lastfrag = DIRECT_FRAGMENT;
unlock_kernel();
mark_inode_dirty(inode);
out:
diff --git a/include/asm-arm/arch-s3c2410/regs-rtc.h b/include/asm-arm/arch-s3c2410/regs-rtc.h
index 228983f89bc..0fbec07bb6b 100644
--- a/include/asm-arm/arch-s3c2410/regs-rtc.h
+++ b/include/asm-arm/arch-s3c2410/regs-rtc.h
@@ -18,7 +18,7 @@
#ifndef __ASM_ARCH_REGS_RTC_H
#define __ASM_ARCH_REGS_RTC_H __FILE__
-#define S3C2410_RTCREG(x) ((x) + S3C24XX_VA_RTC)
+#define S3C2410_RTCREG(x) (x)
#define S3C2410_RTCCON S3C2410_RTCREG(0x40)
#define S3C2410_RTCCON_RTCEN (1<<0)
diff --git a/include/asm-i386/mmzone.h b/include/asm-i386/mmzone.h
index e33e9f9e4c6..22cb07cc8f3 100644
--- a/include/asm-i386/mmzone.h
+++ b/include/asm-i386/mmzone.h
@@ -14,7 +14,7 @@ extern struct pglist_data *node_data[];
#ifdef CONFIG_X86_NUMAQ
#include <asm/numaq.h>
-#else /* summit or generic arch */
+#elif defined(CONFIG_ACPI_SRAT)/* summit or generic arch */
#include <asm/srat.h>
#endif
diff --git a/include/linux/compat_ioctl.h b/include/linux/compat_ioctl.h
index 269d000bb2a..bea0255196c 100644
--- a/include/linux/compat_ioctl.h
+++ b/include/linux/compat_ioctl.h
@@ -216,6 +216,7 @@ COMPATIBLE_IOCTL(VT_RESIZE)
COMPATIBLE_IOCTL(VT_RESIZEX)
COMPATIBLE_IOCTL(VT_LOCKSWITCH)
COMPATIBLE_IOCTL(VT_UNLOCKSWITCH)
+COMPATIBLE_IOCTL(VT_GETHIFONTMASK)
/* Little p (/dev/rtc, /dev/envctrl, etc.) */
COMPATIBLE_IOCTL(RTC_AIE_ON)
COMPATIBLE_IOCTL(RTC_AIE_OFF)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 25610205c90..555bc195c42 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -570,13 +570,14 @@ struct inode {
* 3: quota file
*
* The locking order between these classes is
- * parent -> child -> normal -> quota
+ * parent -> child -> normal -> xattr -> quota
*/
enum inode_i_mutex_lock_class
{
I_MUTEX_NORMAL,
I_MUTEX_PARENT,
I_MUTEX_CHILD,
+ I_MUTEX_XATTR,
I_MUTEX_QUOTA
};
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index 20eb34403d0..a04c154c520 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -72,6 +72,9 @@ extern int journal_enable_debug;
#endif
extern void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry);
+extern void * jbd_slab_alloc(size_t size, gfp_t flags);
+extern void jbd_slab_free(void *ptr, size_t size);
+
#define jbd_kmalloc(size, flags) \
__jbd_kmalloc(__FUNCTION__, (size), (flags), journal_oom_retry)
#define jbd_rep_kmalloc(size, flags) \
diff --git a/include/linux/node.h b/include/linux/node.h
index 81dcec84cd8..bc001bc225c 100644
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -30,12 +30,20 @@ extern struct node node_devices[];
extern int register_node(struct node *, int, struct node *);
extern void unregister_node(struct node *node);
+#ifdef CONFIG_NUMA
extern int register_one_node(int nid);
extern void unregister_one_node(int nid);
-#ifdef CONFIG_NUMA
extern int register_cpu_under_node(unsigned int cpu, unsigned int nid);
extern int unregister_cpu_under_node(unsigned int cpu, unsigned int nid);
#else
+static inline int register_one_node(int nid)
+{
+ return 0;
+}
+static inline int unregister_one_node(int nid)
+{
+ return 0;
+}
static inline int register_cpu_under_node(unsigned int cpu, unsigned int nid)
{
return 0;
diff --git a/include/linux/tty.h b/include/linux/tty.h
index e421d5e3481..04827ca6578 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -59,6 +59,7 @@ struct tty_bufhead {
struct tty_buffer *head; /* Queue head */
struct tty_buffer *tail; /* Active buffer */
struct tty_buffer *free; /* Free queue head */
+ int memory_used; /* Buffer space used excluding free queue */
};
/*
* The pty uses char_buf and flag_buf as a contiguous buffer
diff --git a/include/linux/vt.h b/include/linux/vt.h
index 8ab334a4822..ba806e8711b 100644
--- a/include/linux/vt.h
+++ b/include/linux/vt.h
@@ -60,5 +60,6 @@ struct vt_consize {
#define VT_RESIZEX 0x560A /* set kernel's idea of screensize + more */
#define VT_LOCKSWITCH 0x560B /* disallow vt switching */
#define VT_UNLOCKSWITCH 0x560C /* allow vt switching */
+#define VT_GETHIFONTMASK 0x560D /* return hi font mask */
#endif /* _LINUX_VT_H */
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 1a649f2bb9b..4ea6f0dc2fc 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -816,6 +816,10 @@ static int update_cpumask(struct cpuset *cs, char *buf)
struct cpuset trialcs;
int retval, cpus_unchanged;
+ /* top_cpuset.cpus_allowed tracks cpu_online_map; it's read-only */
+ if (cs == &top_cpuset)
+ return -EACCES;
+
trialcs = *cs;
retval = cpulist_parse(buf, trialcs.cpus_allowed);
if (retval < 0)
@@ -2033,6 +2037,33 @@ out:
return err;
}
+/*
+ * The top_cpuset tracks what CPUs and Memory Nodes are online,
+ * period. This is necessary in order to make cpusets transparent
+ * (of no affect) on systems that are actively using CPU hotplug
+ * but making no active use of cpusets.
+ *
+ * This handles CPU hotplug (cpuhp) events. If someday Memory
+ * Nodes can be hotplugged (dynamically changing node_online_map)
+ * then we should handle that too, perhaps in a similar way.
+ */
+
+#ifdef CONFIG_HOTPLUG_CPU
+static int cpuset_handle_cpuhp(struct notifier_block *nb,
+ unsigned long phase, void *cpu)
+{
+ mutex_lock(&manage_mutex);
+ mutex_lock(&callback_mutex);
+
+ top_cpuset.cpus_allowed = cpu_online_map;
+
+ mutex_unlock(&callback_mutex);
+ mutex_unlock(&manage_mutex);
+
+ return 0;
+}
+#endif
+
/**
* cpuset_init_smp - initialize cpus_allowed
*
@@ -2043,6 +2074,8 @@ void __init cpuset_init_smp(void)
{
top_cpuset.cpus_allowed = cpu_online_map;
top_cpuset.mems_allowed = node_online_map;
+
+ hotcpu_notifier(cpuset_handle_cpuhp, 0);
}
/**
@@ -2387,7 +2420,7 @@ EXPORT_SYMBOL_GPL(cpuset_mem_spread_node);
int cpuset_excl_nodes_overlap(const struct task_struct *p)
{
const struct cpuset *cs1, *cs2; /* my and p's cpuset ancestors */
- int overlap = 0; /* do cpusets overlap? */
+ int overlap = 1; /* do cpusets overlap? */
task_lock(current);
if (current->flags & PF_EXITING) {
diff --git a/kernel/futex.c b/kernel/futex.c
index d4633c588f3..b9b8aea5389 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -397,7 +397,7 @@ static struct task_struct * futex_find_get_task(pid_t pid)
p = NULL;
goto out_unlock;
}
- if (p->state == EXIT_ZOMBIE || p->exit_state == EXIT_ZOMBIE) {
+ if (p->exit_state != 0) {
p = NULL;
goto out_unlock;
}
diff --git a/kernel/sched.c b/kernel/sched.c
index a2be2d05529..a234fbee123 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4162,10 +4162,8 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
read_unlock_irq(&tasklist_lock);
return -ESRCH;
}
- get_task_struct(p);
- read_unlock_irq(&tasklist_lock);
retval = sched_setscheduler(p, policy, &lparam);
- put_task_struct(p);
+ read_unlock_irq(&tasklist_lock);
return retval;
}
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index dcfb5d73146..51cacd111db 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -111,7 +111,6 @@ static int stop_machine(void)
/* If some failed, kill them all. */
if (ret < 0) {
stopmachine_set_state(STOPMACHINE_EXIT);
- up(&stopmachine_mutex);
return ret;
}
diff --git a/mm/swapfile.c b/mm/swapfile.c
index e70d6c6d6fe..f1f5ec78378 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -442,11 +442,12 @@ int swap_type_of(dev_t device)
if (!(swap_info[i].flags & SWP_WRITEOK))
continue;
+
if (!device) {
spin_unlock(&swap_lock);
return i;
}
- inode = swap_info->swap_file->f_dentry->d_inode;
+ inode = swap_info[i].swap_file->f_dentry->d_inode;
if (S_ISBLK(inode->i_mode) &&
device == MKDEV(imajor(inode), iminor(inode))) {
spin_unlock(&swap_lock);
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 0f85970ee6d..090bc39e819 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -342,6 +342,8 @@ static int ccid3_hc_tx_send_packet(struct sock *sk,
new_packet->dccphtx_ccval =
DCCP_SKB_CB(skb)->dccpd_ccval =
hctx->ccid3hctx_last_win_count;
+ timeval_add_usecs(&hctx->ccid3hctx_t_nom,
+ hctx->ccid3hctx_t_ipi);
}
out:
return rc;
@@ -413,7 +415,8 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len)
case TFRC_SSTATE_NO_FBACK:
case TFRC_SSTATE_FBACK:
if (len > 0) {
- hctx->ccid3hctx_t_nom = now;
+ timeval_sub_usecs(&hctx->ccid3hctx_t_nom,
+ hctx->ccid3hctx_t_ipi);
ccid3_calc_new_t_ipi(hctx);
ccid3_calc_new_delta(hctx);
timeval_add_usecs(&hctx->ccid3hctx_t_nom,
@@ -757,8 +760,7 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk)
}
hcrx->ccid3hcrx_tstamp_last_feedback = now;
- hcrx->ccid3hcrx_last_counter = packet->dccphrx_ccval;
- hcrx->ccid3hcrx_seqno_last_counter = packet->dccphrx_seqno;
+ hcrx->ccid3hcrx_ccval_last_counter = packet->dccphrx_ccval;
hcrx->ccid3hcrx_bytes_recv = 0;
/* Convert to multiples of 10us */
@@ -782,7 +784,7 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN))
return 0;
- DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_last_counter;
+ DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_ccval_last_counter;
if (dccp_packet_without_ack(skb))
return 0;
@@ -854,6 +856,11 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk)
interval = 1;
}
found:
+ if (!tail) {
+ LIMIT_NETDEBUG(KERN_WARNING "%s: tail is null\n",
+ __FUNCTION__);
+ return ~0;
+ }
rtt = timeval_delta(&tstamp, &tail->dccphrx_tstamp) * 4 / interval;
ccid3_pr_debug("%s, sk=%p, approximated RTT to %uus\n",
dccp_role(sk), sk, rtt);
@@ -864,9 +871,20 @@ found:
delta = timeval_delta(&tstamp, &hcrx->ccid3hcrx_tstamp_last_feedback);
x_recv = usecs_div(hcrx->ccid3hcrx_bytes_recv, delta);
+ if (x_recv == 0)
+ x_recv = hcrx->ccid3hcrx_x_recv;
+
tmp1 = (u64)x_recv * (u64)rtt;
do_div(tmp1,10000000);
tmp2 = (u32)tmp1;
+
+ if (!tmp2) {
+ LIMIT_NETDEBUG(KERN_WARNING "tmp2 = 0 "
+ "%s: x_recv = %u, rtt =%u\n",
+ __FUNCTION__, x_recv, rtt);
+ return ~0;
+ }
+
fval = (hcrx->ccid3hcrx_s * 100000) / tmp2;
/* do not alter order above or you will get overflow on 32 bit */
p = tfrc_calc_x_reverse_lookup(fval);
@@ -882,31 +900,101 @@ found:
static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss)
{
struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
+ struct dccp_li_hist_entry *next, *head;
+ u64 seq_temp;
- if (seq_loss != DCCP_MAX_SEQNO + 1 &&
- list_empty(&hcrx->ccid3hcrx_li_hist)) {
- struct dccp_li_hist_entry *li_tail;
+ if (list_empty(&hcrx->ccid3hcrx_li_hist)) {
+ if (!dccp_li_hist_interval_new(ccid3_li_hist,
+ &hcrx->ccid3hcrx_li_hist, seq_loss, win_loss))
+ return;
- li_tail = dccp_li_hist_interval_new(ccid3_li_hist,
- &hcrx->ccid3hcrx_li_hist,
- seq_loss, win_loss);
- if (li_tail == NULL)
+ next = (struct dccp_li_hist_entry *)
+ hcrx->ccid3hcrx_li_hist.next;
+ next->dccplih_interval = ccid3_hc_rx_calc_first_li(sk);
+ } else {
+ struct dccp_li_hist_entry *entry;
+ struct list_head *tail;
+
+ head = (struct dccp_li_hist_entry *)
+ hcrx->ccid3hcrx_li_hist.next;
+ /* FIXME win count check removed as was wrong */
+ /* should make this check with receive history */
+ /* and compare there as per section 10.2 of RFC4342 */
+
+ /* new loss event detected */
+ /* calculate last interval length */
+ seq_temp = dccp_delta_seqno(head->dccplih_seqno, seq_loss);
+ entry = dccp_li_hist_entry_new(ccid3_li_hist, SLAB_ATOMIC);
+
+ if (entry == NULL) {
+ printk(KERN_CRIT "%s: out of memory\n",__FUNCTION__);
+ dump_stack();
return;
- li_tail->dccplih_interval = ccid3_hc_rx_calc_first_li(sk);
- } else
- LIMIT_NETDEBUG(KERN_WARNING "%s: FIXME: find end of "
- "interval\n", __FUNCTION__);
+ }
+
+ list_add(&entry->dccplih_node, &hcrx->ccid3hcrx_li_hist);
+
+ tail = hcrx->ccid3hcrx_li_hist.prev;
+ list_del(tail);
+ kmem_cache_free(ccid3_li_hist->dccplih_slab, tail);
+
+ /* Create the newest interval */
+ entry->dccplih_seqno = seq_loss;
+ entry->dccplih_interval = seq_temp;
+ entry->dccplih_win_count = win_loss;
+ }
}
-static void ccid3_hc_rx_detect_loss(struct sock *sk)
+static int ccid3_hc_rx_detect_loss(struct sock *sk,
+ struct dccp_rx_hist_entry *packet)
{
struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
- u8 win_loss;
- const u64 seq_loss = dccp_rx_hist_detect_loss(&hcrx->ccid3hcrx_hist,
- &hcrx->ccid3hcrx_li_hist,
- &win_loss);
+ struct dccp_rx_hist_entry *rx_hist = dccp_rx_hist_head(&hcrx->ccid3hcrx_hist);
+ u64 seqno = packet->dccphrx_seqno;
+ u64 tmp_seqno;
+ int loss = 0;
+ u8 ccval;
+
+
+ tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss;
+
+ if (!rx_hist ||
+ follows48(packet->dccphrx_seqno, hcrx->ccid3hcrx_seqno_nonloss)) {
+ hcrx->ccid3hcrx_seqno_nonloss = seqno;
+ hcrx->ccid3hcrx_ccval_nonloss = packet->dccphrx_ccval;
+ goto detect_out;
+ }
+
- ccid3_hc_rx_update_li(sk, seq_loss, win_loss);
+ while (dccp_delta_seqno(hcrx->ccid3hcrx_seqno_nonloss, seqno)
+ > TFRC_RECV_NUM_LATE_LOSS) {
+ loss = 1;
+ ccid3_hc_rx_update_li(sk, hcrx->ccid3hcrx_seqno_nonloss,
+ hcrx->ccid3hcrx_ccval_nonloss);
+ tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss;
+ dccp_inc_seqno(&tmp_seqno);
+ hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno;
+ dccp_inc_seqno(&tmp_seqno);
+ while (dccp_rx_hist_find_entry(&hcrx->ccid3hcrx_hist,
+ tmp_seqno, &ccval)) {
+ hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno;
+ hcrx->ccid3hcrx_ccval_nonloss = ccval;
+ dccp_inc_seqno(&tmp_seqno);
+ }
+ }
+
+ /* FIXME - this code could be simplified with above while */
+ /* but works at moment */
+ if (follows48(packet->dccphrx_seqno, hcrx->ccid3hcrx_seqno_nonloss)) {
+ hcrx->ccid3hcrx_seqno_nonloss = seqno;
+ hcrx->ccid3hcrx_ccval_nonloss = packet->dccphrx_ccval;
+ }
+
+detect_out:
+ dccp_rx_hist_add_packet(ccid3_rx_hist, &hcrx->ccid3hcrx_hist,
+ &hcrx->ccid3hcrx_li_hist, packet,
+ hcrx->ccid3hcrx_seqno_nonloss);
+ return loss;
}
static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
@@ -916,8 +1004,8 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
struct dccp_rx_hist_entry *packet;
struct timeval now;
u8 win_count;
- u32 p_prev, r_sample, t_elapsed;
- int ins;
+ u32 p_prev, rtt_prev, r_sample, t_elapsed;
+ int loss;
BUG_ON(hcrx == NULL ||
!(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA ||
@@ -932,7 +1020,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
case DCCP_PKT_DATAACK:
if (opt_recv->dccpor_timestamp_echo == 0)
break;
- p_prev = hcrx->ccid3hcrx_rtt;
+ rtt_prev = hcrx->ccid3hcrx_rtt;
dccp_timestamp(sk, &now);
timeval_sub_usecs(&now, opt_recv->dccpor_timestamp_echo * 10);
r_sample = timeval_usecs(&now);
@@ -951,8 +1039,8 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
hcrx->ccid3hcrx_rtt = (hcrx->ccid3hcrx_rtt * 9) / 10 +
r_sample / 10;
- if (p_prev != hcrx->ccid3hcrx_rtt)
- ccid3_pr_debug("%s, New RTT=%luus, elapsed time=%u\n",
+ if (rtt_prev != hcrx->ccid3hcrx_rtt)
+ ccid3_pr_debug("%s, New RTT=%uus, elapsed time=%u\n",
dccp_role(sk), hcrx->ccid3hcrx_rtt,
opt_recv->dccpor_elapsed_time);
break;
@@ -973,8 +1061,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
win_count = packet->dccphrx_ccval;
- ins = dccp_rx_hist_add_packet(ccid3_rx_hist, &hcrx->ccid3hcrx_hist,
- &hcrx->ccid3hcrx_li_hist, packet);
+ loss = ccid3_hc_rx_detect_loss(sk, packet);
if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK)
return;
@@ -991,7 +1078,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
case TFRC_RSTATE_DATA:
hcrx->ccid3hcrx_bytes_recv += skb->len -
dccp_hdr(skb)->dccph_doff * 4;
- if (ins != 0)
+ if (loss)
break;
dccp_timestamp(sk, &now);
@@ -1012,7 +1099,6 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
ccid3_pr_debug("%s, sk=%p(%s), data loss! Reacting...\n",
dccp_role(sk), sk, dccp_state_name(sk->sk_state));
- ccid3_hc_rx_detect_loss(sk);
p_prev = hcrx->ccid3hcrx_p;
/* Calculate loss event rate */
@@ -1022,6 +1108,9 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
/* Scaling up by 1000000 as fixed decimal */
if (i_mean != 0)
hcrx->ccid3hcrx_p = 1000000 / i_mean;
+ } else {
+ printk(KERN_CRIT "%s: empty loss hist\n",__FUNCTION__);
+ dump_stack();
}
if (hcrx->ccid3hcrx_p > p_prev) {
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 22cb9f80a09..0a2cb7536d2 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -120,9 +120,10 @@ struct ccid3_hc_rx_sock {
#define ccid3hcrx_x_recv ccid3hcrx_tfrc.tfrcrx_x_recv
#define ccid3hcrx_rtt ccid3hcrx_tfrc.tfrcrx_rtt
#define ccid3hcrx_p ccid3hcrx_tfrc.tfrcrx_p
- u64 ccid3hcrx_seqno_last_counter:48,
+ u64 ccid3hcrx_seqno_nonloss:48,
+ ccid3hcrx_ccval_nonloss:4,
ccid3hcrx_state:8,
- ccid3hcrx_last_counter:4;
+ ccid3hcrx_ccval_last_counter:4;
u32 ccid3hcrx_bytes_recv;
struct timeval ccid3hcrx_tstamp_last_feedback;
struct timeval ccid3hcrx_tstamp_last_ack;
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index b93d9fc98cb..906c81ab9d4 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -12,6 +12,7 @@
*/
#include <linux/module.h>
+#include <net/sock.h>
#include "loss_interval.h"
@@ -90,13 +91,13 @@ u32 dccp_li_hist_calc_i_mean(struct list_head *list)
u32 w_tot = 0;
list_for_each_entry_safe(li_entry, li_next, list, dccplih_node) {
- if (i < DCCP_LI_HIST_IVAL_F_LENGTH) {
+ if (li_entry->dccplih_interval != ~0) {
i_tot0 += li_entry->dccplih_interval * dccp_li_hist_w[i];
w_tot += dccp_li_hist_w[i];
+ if (i != 0)
+ i_tot1 += li_entry->dccplih_interval * dccp_li_hist_w[i - 1];
}
- if (i != 0)
- i_tot1 += li_entry->dccplih_interval * dccp_li_hist_w[i - 1];
if (++i > DCCP_LI_HIST_IVAL_F_LENGTH)
break;
@@ -107,37 +108,36 @@ u32 dccp_li_hist_calc_i_mean(struct list_head *list)
i_tot = max(i_tot0, i_tot1);
- /* FIXME: Why do we do this? -Ian McDonald */
- if (i_tot * 4 < w_tot)
- i_tot = w_tot * 4;
+ if (!w_tot) {
+ LIMIT_NETDEBUG(KERN_WARNING "%s: w_tot = 0\n", __FUNCTION__);
+ return 1;
+ }
- return i_tot * 4 / w_tot;
+ return i_tot / w_tot;
}
EXPORT_SYMBOL_GPL(dccp_li_hist_calc_i_mean);
-struct dccp_li_hist_entry *dccp_li_hist_interval_new(struct dccp_li_hist *hist,
- struct list_head *list,
- const u64 seq_loss,
- const u8 win_loss)
+int dccp_li_hist_interval_new(struct dccp_li_hist *hist,
+ struct list_head *list, const u64 seq_loss, const u8 win_loss)
{
- struct dccp_li_hist_entry *tail = NULL, *entry;
+ struct dccp_li_hist_entry *entry;
int i;
- for (i = 0; i <= DCCP_LI_HIST_IVAL_F_LENGTH; ++i) {
+ for (i = 0; i < DCCP_LI_HIST_IVAL_F_LENGTH; i++) {
entry = dccp_li_hist_entry_new(hist, SLAB_ATOMIC);
if (entry == NULL) {
dccp_li_hist_purge(hist, list);
- return NULL;
+ dump_stack();
+ return 0;
}
- if (tail == NULL)
- tail = entry;
+ entry->dccplih_interval = ~0;
list_add(&entry->dccplih_node, list);
}
entry->dccplih_seqno = seq_loss;
entry->dccplih_win_count = win_loss;
- return tail;
+ return 1;
}
EXPORT_SYMBOL_GPL(dccp_li_hist_interval_new);
diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h
index dcb370a53f5..0ae85f0340b 100644
--- a/net/dccp/ccids/lib/loss_interval.h
+++ b/net/dccp/ccids/lib/loss_interval.h
@@ -52,9 +52,6 @@ extern void dccp_li_hist_purge(struct dccp_li_hist *hist,
extern u32 dccp_li_hist_calc_i_mean(struct list_head *list);
-extern struct dccp_li_hist_entry *
- dccp_li_hist_interval_new(struct dccp_li_hist *hist,
- struct list_head *list,
- const u64 seq_loss,
- const u8 win_loss);
+extern int dccp_li_hist_interval_new(struct dccp_li_hist *hist,
+ struct list_head *list, const u64 seq_loss, const u8 win_loss);
#endif /* _DCCP_LI_HIST_ */
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index 420c60f8604..b876c9c81c6 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -112,64 +112,27 @@ struct dccp_rx_hist_entry *
EXPORT_SYMBOL_GPL(dccp_rx_hist_find_data_packet);
-int dccp_rx_hist_add_packet(struct dccp_rx_hist *hist,
+void dccp_rx_hist_add_packet(struct dccp_rx_hist *hist,
struct list_head *rx_list,
struct list_head *li_list,
- struct dccp_rx_hist_entry *packet)
+ struct dccp_rx_hist_entry *packet,
+ u64 nonloss_seqno)
{
- struct dccp_rx_hist_entry *entry, *next, *iter;
+ struct dccp_rx_hist_entry *entry, *next;
u8 num_later = 0;
- iter = dccp_rx_hist_head(rx_list);
- if (iter == NULL)
- dccp_rx_hist_add_entry(rx_list, packet);
- else {
- const u64 seqno = packet->dccphrx_seqno;
-
- if (after48(seqno, iter->dccphrx_seqno))
- dccp_rx_hist_add_entry(rx_list, packet);
- else {
- if (dccp_rx_hist_entry_data_packet(iter))
- num_later = 1;
-
- list_for_each_entry_continue(iter, rx_list,
- dccphrx_node) {
- if (after48(seqno, iter->dccphrx_seqno)) {
- dccp_rx_hist_add_entry(&iter->dccphrx_node,
- packet);
- goto trim_history;
- }
-
- if (dccp_rx_hist_entry_data_packet(iter))
- num_later++;
-
- if (num_later == TFRC_RECV_NUM_LATE_LOSS) {
- dccp_rx_hist_entry_delete(hist, packet);
- return 1;
- }
- }
-
- if (num_later < TFRC_RECV_NUM_LATE_LOSS)
- dccp_rx_hist_add_entry(rx_list, packet);
- /*
- * FIXME: else what? should we destroy the packet
- * like above?
- */
- }
- }
+ list_add(&packet->dccphrx_node, rx_list);
-trim_history:
- /*
- * Trim history (remove all packets after the NUM_LATE_LOSS + 1
- * data packets)
- */
num_later = TFRC_RECV_NUM_LATE_LOSS + 1;
if (!list_empty(li_list)) {
list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) {
if (num_later == 0) {
- list_del_init(&entry->dccphrx_node);
- dccp_rx_hist_entry_delete(hist, entry);
+ if (after48(nonloss_seqno,
+ entry->dccphrx_seqno)) {
+ list_del_init(&entry->dccphrx_node);
+ dccp_rx_hist_entry_delete(hist, entry);
+ }
} else if (dccp_rx_hist_entry_data_packet(entry))
--num_later;
}
@@ -217,94 +180,10 @@ trim_history:
--num_later;
}
}
-
- return 0;
}
EXPORT_SYMBOL_GPL(dccp_rx_hist_add_packet);
-u64 dccp_rx_hist_detect_loss(struct list_head *rx_list,
- struct list_head *li_list, u8 *win_loss)
-{
- struct dccp_rx_hist_entry *entry, *next, *packet;
- struct dccp_rx_hist_entry *a_loss = NULL;
- struct dccp_rx_hist_entry *b_loss = NULL;
- u64 seq_loss = DCCP_MAX_SEQNO + 1;
- u8 num_later = TFRC_RECV_NUM_LATE_LOSS;
-
- list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) {
- if (num_later == 0) {
- b_loss = entry;
- break;
- } else if (dccp_rx_hist_entry_data_packet(entry))
- --num_later;
- }
-
- if (b_loss == NULL)
- goto out;
-
- num_later = 1;
- list_for_each_entry_safe_continue(entry, next, rx_list, dccphrx_node) {
- if (num_later == 0) {
- a_loss = entry;
- break;
- } else if (dccp_rx_hist_entry_data_packet(entry))
- --num_later;
- }
-
- if (a_loss == NULL) {
- if (list_empty(li_list)) {
- /* no loss event have occured yet */
- LIMIT_NETDEBUG("%s: TODO: find a lost data packet by "
- "comparing to initial seqno\n",
- __FUNCTION__);
- goto out;
- } else {
- LIMIT_NETDEBUG("%s: Less than 4 data pkts in history!",
- __FUNCTION__);
- goto out;
- }
- }
-
- /* Locate a lost data packet */
- entry = packet = b_loss;
- list_for_each_entry_safe_continue(entry, next, rx_list, dccphrx_node) {
- u64 delta = dccp_delta_seqno(entry->dccphrx_seqno,
- packet->dccphrx_seqno);
-
- if (delta != 0) {
- if (dccp_rx_hist_entry_data_packet(packet))
- --delta;
- /*
- * FIXME: check this, probably this % usage is because
- * in earlier drafts the ndp count was just 8 bits
- * long, but now it cam be up to 24 bits long.
- */
-#if 0
- if (delta % DCCP_NDP_LIMIT !=
- (packet->dccphrx_ndp -
- entry->dccphrx_ndp) % DCCP_NDP_LIMIT)
-#endif
- if (delta != packet->dccphrx_ndp - entry->dccphrx_ndp) {
- seq_loss = entry->dccphrx_seqno;
- dccp_inc_seqno(&seq_loss);
- }
- }
- packet = entry;
- if (packet == a_loss)
- break;
- }
-out:
- if (seq_loss != DCCP_MAX_SEQNO + 1)
- *win_loss = a_loss->dccphrx_ccval;
- else
- *win_loss = 0; /* Paranoia */
-
- return seq_loss;
-}
-
-EXPORT_SYMBOL_GPL(dccp_rx_hist_detect_loss);
-
struct dccp_tx_hist *dccp_tx_hist_new(const char *name)
{
struct dccp_tx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC);
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index aea9c5d7091..067cf1c85a3 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -166,12 +166,6 @@ static inline void dccp_rx_hist_entry_delete(struct dccp_rx_hist *hist,
extern void dccp_rx_hist_purge(struct dccp_rx_hist *hist,
struct list_head *list);
-static inline void dccp_rx_hist_add_entry(struct list_head *list,
- struct dccp_rx_hist_entry *entry)
-{
- list_add(&entry->dccphrx_node, list);
-}
-
static inline struct dccp_rx_hist_entry *
dccp_rx_hist_head(struct list_head *list)
{
@@ -190,10 +184,11 @@ static inline int
entry->dccphrx_type == DCCP_PKT_DATAACK;
}
-extern int dccp_rx_hist_add_packet(struct dccp_rx_hist *hist,
+extern void dccp_rx_hist_add_packet(struct dccp_rx_hist *hist,
struct list_head *rx_list,
struct list_head *li_list,
- struct dccp_rx_hist_entry *packet);
+ struct dccp_rx_hist_entry *packet,
+ u64 nonloss_seqno);
extern u64 dccp_rx_hist_detect_loss(struct list_head *rx_list,
struct list_head *li_list, u8 *win_loss);