diff options
Diffstat (limited to 'include/linux')
28 files changed, 1392 insertions, 211 deletions
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 6e915878e88..d960889e92e 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -144,6 +144,9 @@ struct blk_user_trace_setup { #ifdef __KERNEL__ #if defined(CONFIG_BLK_DEV_IO_TRACE) + +#include <linux/sysfs.h> + struct blk_trace { int trace_state; struct rchan *rchan; @@ -194,6 +197,8 @@ extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, extern int blk_trace_startstop(struct request_queue *q, int start); extern int blk_trace_remove(struct request_queue *q); +extern struct attribute_group blk_trace_attr_group; + #else /* !CONFIG_BLK_DEV_IO_TRACE */ #define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY) #define blk_trace_shutdown(q) do { } while (0) diff --git a/include/linux/compat.h b/include/linux/compat.h index 9723edd6455..f2ded21f9a3 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -193,10 +193,10 @@ asmlinkage ssize_t compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec, unsigned long vlen); asmlinkage ssize_t compat_sys_preadv(unsigned long fd, const struct compat_iovec __user *vec, - unsigned long vlen, u32 pos_high, u32 pos_low); + unsigned long vlen, u32 pos_low, u32 pos_high); asmlinkage ssize_t compat_sys_pwritev(unsigned long fd, const struct compat_iovec __user *vec, - unsigned long vlen, u32 pos_high, u32 pos_low); + unsigned long vlen, u32 pos_low, u32 pos_high); int compat_do_execve(char * filename, compat_uptr_t __user *argv, compat_uptr_t __user *envp, struct pt_regs * regs); diff --git a/include/linux/compiler.h b/include/linux/compiler.h index d95da1020f1..6faa7e549de 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -68,6 +68,7 @@ struct ftrace_branch_data { unsigned long miss; unsigned long hit; }; + unsigned long miss_hit[2]; }; }; @@ -125,10 +126,7 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); .line = __LINE__, \ }; \ ______r = !!(cond); \ - if (______r) \ - ______f.hit++; \ - else \ - ______f.miss++; \ + ______f.miss_hit[______r]++; \ ______r; \ })) #endif /* CONFIG_PROFILE_ALL_BRANCHES */ diff --git a/include/linux/connector.h b/include/linux/connector.h index fc65d219d88..b9966e64604 100644 --- a/include/linux/connector.h +++ b/include/linux/connector.h @@ -39,8 +39,10 @@ #define CN_IDX_V86D 0x4 #define CN_VAL_V86D_UVESAFB 0x1 #define CN_IDX_BB 0x5 /* BlackBoard, from the TSP GPL sampling framework */ +#define CN_DST_IDX 0x6 +#define CN_DST_VAL 0x1 -#define CN_NETLINK_USERS 6 +#define CN_NETLINK_USERS 7 /* * Maximum connector's message size. diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index af0e01d4c66..eb5c2ba2f81 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -71,6 +71,9 @@ struct dentry *debugfs_create_bool(const char *name, mode_t mode, struct dentry *debugfs_create_blob(const char *name, mode_t mode, struct dentry *parent, struct debugfs_blob_wrapper *blob); + +bool debugfs_initialized(void); + #else #include <linux/err.h> @@ -183,6 +186,11 @@ static inline struct dentry *debugfs_create_blob(const char *name, mode_t mode, return ERR_PTR(-ENODEV); } +static inline bool debugfs_initialized(void) +{ + return false; +} + #endif #endif diff --git a/include/linux/dst.h b/include/linux/dst.h new file mode 100644 index 00000000000..e26fed84b1a --- /dev/null +++ b/include/linux/dst.h @@ -0,0 +1,587 @@ +/* + * 2007+ Copyright (c) Evgeniy Polyakov <johnpol@2ka.mipt.ru> + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __DST_H +#define __DST_H + +#include <linux/types.h> +#include <linux/connector.h> + +#define DST_NAMELEN 32 +#define DST_NAME "dst" + +enum { + /* Remove node with given id from storage */ + DST_DEL_NODE = 0, + /* Add remote node with given id to the storage */ + DST_ADD_REMOTE, + /* Add local node with given id to the storage to be exported and used by remote peers */ + DST_ADD_EXPORT, + /* Crypto initialization command (hash/cipher used to protect the connection) */ + DST_CRYPTO, + /* Security attributes for given connection (permissions for example) */ + DST_SECURITY, + /* Register given node in the block layer subsystem */ + DST_START, + DST_CMD_MAX +}; + +struct dst_ctl +{ + /* Storage name */ + char name[DST_NAMELEN]; + /* Command flags */ + __u32 flags; + /* Command itself (see above) */ + __u32 cmd; + /* Maximum number of pages per single request in this device */ + __u32 max_pages; + /* Stale/error transaction scanning timeout in milliseconds */ + __u32 trans_scan_timeout; + /* Maximum number of retry sends before completing transaction as broken */ + __u32 trans_max_retries; + /* Storage size */ + __u64 size; +}; + +/* Reply command carries completion status */ +struct dst_ctl_ack +{ + struct cn_msg msg; + int error; + int unused[3]; +}; + +/* + * Unfortunaltely socket address structure is not exported to userspace + * and is redefined there. + */ +#define SADDR_MAX_DATA 128 + +struct saddr { + /* address family, AF_xxx */ + unsigned short sa_family; + /* 14 bytes of protocol address */ + char sa_data[SADDR_MAX_DATA]; + /* Number of bytes used in sa_data */ + unsigned short sa_data_len; +}; + +/* Address structure */ +struct dst_network_ctl +{ + /* Socket type: datagram, stream...*/ + unsigned int type; + /* Let me guess, is it a Jupiter diameter? */ + unsigned int proto; + /* Peer's address */ + struct saddr addr; +}; + +struct dst_crypto_ctl +{ + /* Cipher and hash names */ + char cipher_algo[DST_NAMELEN]; + char hash_algo[DST_NAMELEN]; + + /* Key sizes. Can be zero for digest for example */ + unsigned int cipher_keysize, hash_keysize; + /* Alignment. Calculated by the DST itself. */ + unsigned int crypto_attached_size; + /* Number of threads to perform crypto operations */ + int thread_num; +}; + +/* Export security attributes have this bits checked in when client connects */ +#define DST_PERM_READ (1<<0) +#define DST_PERM_WRITE (1<<1) + +/* + * Right now it is simple model, where each remote address + * is assigned to set of permissions it is allowed to perform. + * In real world block device does not know anything but + * reading and writing, so it should be more than enough. + */ +struct dst_secure_user +{ + unsigned int permissions; + struct saddr addr; +}; + +/* + * Export control command: device to export and network address to accept + * clients to work with given device + */ +struct dst_export_ctl +{ + char device[DST_NAMELEN]; + struct dst_network_ctl ctl; +}; + +enum { + DST_CFG = 1, /* Request remote configuration */ + DST_IO, /* IO command */ + DST_IO_RESPONSE, /* IO response */ + DST_PING, /* Keepalive message */ + DST_NCMD_MAX, +}; + +struct dst_cmd +{ + /* Network command itself, see above */ + __u32 cmd; + /* + * Size of the attached data + * (in most cases, for READ command it means how many bytes were requested) + */ + __u32 size; + /* Crypto size: number of attached bytes with digest/hmac */ + __u32 csize; + /* Here we can carry secret data */ + __u32 reserved; + /* Read/write bits, see how they are encoded in bio structure */ + __u64 rw; + /* BIO flags */ + __u64 flags; + /* Unique command id (like transaction ID) */ + __u64 id; + /* Sector to start IO from */ + __u64 sector; + /* Hash data is placed after this header */ + __u8 hash[0]; +}; + +/* + * Convert command to/from network byte order. + * We do not use hton*() functions, since there is + * no 64-bit implementation. + */ +static inline void dst_convert_cmd(struct dst_cmd *c) +{ + c->cmd = __cpu_to_be32(c->cmd); + c->csize = __cpu_to_be32(c->csize); + c->size = __cpu_to_be32(c->size); + c->sector = __cpu_to_be64(c->sector); + c->id = __cpu_to_be64(c->id); + c->flags = __cpu_to_be64(c->flags); + c->rw = __cpu_to_be64(c->rw); +} + +/* Transaction id */ +typedef __u64 dst_gen_t; + +#ifdef __KERNEL__ + +#include <linux/blkdev.h> +#include <linux/bio.h> +#include <linux/device.h> +#include <linux/mempool.h> +#include <linux/net.h> +#include <linux/poll.h> +#include <linux/rbtree.h> + +#ifdef CONFIG_DST_DEBUG +#define dprintk(f, a...) printk(KERN_NOTICE f, ##a) +#else +static inline void __attribute__ ((format (printf, 1, 2))) + dprintk(const char *fmt, ...) {} +#endif + +struct dst_node; + +struct dst_trans +{ + /* DST node we are working with */ + struct dst_node *n; + + /* Entry inside transaction tree */ + struct rb_node trans_entry; + + /* Merlin kills this transaction when this memory cell equals zero */ + atomic_t refcnt; + + /* How this transaction should be processed by crypto engine */ + short enc; + /* How many times this transaction was resent */ + short retries; + /* Completion status */ + int error; + + /* When did we send it to the remote peer */ + long send_time; + + /* My name is... + * Well, computers does not speak, they have unique id instead */ + dst_gen_t gen; + + /* Block IO we are working with */ + struct bio *bio; + + /* Network command for above block IO request */ + struct dst_cmd cmd; +}; + +struct dst_crypto_engine +{ + /* What should we do with all block requests */ + struct crypto_hash *hash; + struct crypto_ablkcipher *cipher; + + /* Pool of pages used to encrypt data into before sending */ + int page_num; + struct page **pages; + + /* What to do with current request */ + int enc; + /* Who we are and where do we go */ + struct scatterlist *src, *dst; + + /* Maximum timeout waiting for encryption to be completed */ + long timeout; + /* IV is a 64-bit sequential counter */ + u64 iv; + + /* Secret data */ + void *private; + + /* Cached temporary data lives here */ + int size; + void *data; +}; + +struct dst_state +{ + /* The main state protection */ + struct mutex state_lock; + + /* Polling machinery for sockets */ + wait_queue_t wait; + wait_queue_head_t *whead; + /* Most of events are being waited here */ + wait_queue_head_t thread_wait; + + /* Who owns this? */ + struct dst_node *node; + + /* Network address for this state */ + struct dst_network_ctl ctl; + + /* Permissions to work with: read-only or rw connection */ + u32 permissions; + + /* Called when we need to clean private data */ + void (* cleanup)(struct dst_state *st); + + /* Used by the server: BIO completion queues BIOs here */ + struct list_head request_list; + spinlock_t request_lock; + + /* Guess what? No, it is not number of planets */ + atomic_t refcnt; + + /* This flags is set when connection should be dropped */ + int need_exit; + + /* + * Socket to work with. Second pointer is used for + * lockless check if socket was changed before performing + * next action (like working with cached polling result) + */ + struct socket *socket, *read_socket; + + /* Cached preallocated data */ + void *data; + unsigned int size; + + /* Currently processed command */ + struct dst_cmd cmd; +}; + +struct dst_info +{ + /* Device size */ + u64 size; + + /* Local device name for export devices */ + char local[DST_NAMELEN]; + + /* Network setup */ + struct dst_network_ctl net; + + /* Sysfs bits use this */ + struct device device; +}; + +struct dst_node +{ + struct list_head node_entry; + + /* Hi, my name is stored here */ + char name[DST_NAMELEN]; + /* My cache name is stored here */ + char cache_name[DST_NAMELEN]; + + /* Block device attached to given node. + * Only valid for exporting nodes */ + struct block_device *bdev; + /* Network state machine for given peer */ + struct dst_state *state; + + /* Block IO machinery */ + struct request_queue *queue; + struct gendisk *disk; + + /* Number of threads in processing pool */ + int thread_num; + /* Maximum number of pages in single IO */ + int max_pages; + + /* I'm that big in bytes */ + loff_t size; + + /* Exported to userspace node information */ + struct dst_info *info; + + /* + * Security attribute list. + * Used only by exporting node currently. + */ + struct list_head security_list; + struct mutex security_lock; + + /* + * When this unerflows below zero, university collapses. + * But this will not happen, since node will be freed, + * when reference counter reaches zero. + */ + atomic_t refcnt; + + /* How precisely should I be started? */ + int (*start)(struct dst_node *); + + /* Crypto capabilities */ + struct dst_crypto_ctl crypto; + u8 *hash_key; + u8 *cipher_key; + + /* Pool of processing thread */ + struct thread_pool *pool; + + /* Transaction IDs live here */ + atomic_long_t gen; + + /* + * How frequently and how many times transaction + * tree should be scanned to drop stale objects. + */ + long trans_scan_timeout; + int trans_max_retries; + + /* Small gnomes live here */ + struct rb_root trans_root; + struct mutex trans_lock; + + /* + * Transaction cache/memory pool. + * It is big enough to contain not only transaction + * itself, but additional crypto data (digest/hmac). + */ + struct kmem_cache *trans_cache; + mempool_t *trans_pool; + + /* This entity scans transaction tree */ + struct delayed_work trans_work; + + wait_queue_head_t wait; +}; + +/* Kernel representation of the security attribute */ +struct dst_secure +{ + struct list_head sec_entry; + struct dst_secure_user sec; +}; + +int dst_process_bio(struct dst_node *n, struct bio *bio); + +int dst_node_init_connected(struct dst_node *n, struct dst_network_ctl *r); +int dst_node_init_listened(struct dst_node *n, struct dst_export_ctl *le); + +static inline struct dst_state *dst_state_get(struct dst_state *st) +{ + BUG_ON(atomic_read(&st->refcnt) == 0); + atomic_inc(&st->refcnt); + return st; +} + +void dst_state_put(struct dst_state *st); + +struct dst_state *dst_state_alloc(struct dst_node *n); +int dst_state_socket_create(struct dst_state *st); +void dst_state_socket_release(struct dst_state *st); + +void dst_state_exit_connected(struct dst_state *st); + +int dst_state_schedule_receiver(struct dst_state *st); + +void dst_dump_addr(struct socket *sk, struct sockaddr *sa, char *str); + +static inline void dst_state_lock(struct dst_state *st) +{ + mutex_lock(&st->state_lock); +} + +static inline void dst_state_unlock(struct dst_state *st) +{ + mutex_unlock(&st->state_lock); +} + +void dst_poll_exit(struct dst_state *st); +int dst_poll_init(struct dst_state *st); + +static inline unsigned int dst_state_poll(struct dst_state *st) +{ + unsigned int revents = POLLHUP | POLLERR; + + dst_state_lock(st); + if (st->socket) + revents = st->socket->ops->poll(NULL, st->socket, NULL); + dst_state_unlock(st); + + return revents; +} + +static inline int dst_thread_setup(void *private, void *data) +{ + return 0; +} + +void dst_node_put(struct dst_node *n); + +static inline struct dst_node *dst_node_get(struct dst_node *n) +{ + atomic_inc(&n->refcnt); + return n; +} + +int dst_data_recv(struct dst_state *st, void *data, unsigned int size); +int dst_recv_cdata(struct dst_state *st, void *cdata); +int dst_data_send_header(struct socket *sock, + void *data, unsigned int size, int more); + +int dst_send_bio(struct dst_state *st, struct dst_cmd *cmd, struct bio *bio); + +int dst_process_io(struct dst_state *st); +int dst_export_crypto(struct dst_node *n, struct bio *bio); +int dst_export_send_bio(struct bio *bio); +int dst_start_export(struct dst_node *n); + +int __init dst_export_init(void); +void dst_export_exit(void); + +/* Private structure for export block IO requests */ +struct dst_export_priv +{ + struct list_head request_entry; + struct dst_state *state; + struct bio *bio; + struct dst_cmd cmd; +}; + +static inline void dst_trans_get(struct dst_trans *t) +{ + atomic_inc(&t->refcnt); +} + +struct dst_trans *dst_trans_search(struct dst_node *node, dst_gen_t gen); +int dst_trans_remove(struct dst_trans *t); +int dst_trans_remove_nolock(struct dst_trans *t); +void dst_trans_put(struct dst_trans *t); + +/* + * Convert bio into network command. + */ +static inline void dst_bio_to_cmd(struct bio *bio, struct dst_cmd *cmd, + u32 command, u64 id) +{ + cmd->cmd = command; + cmd->flags = (bio->bi_flags << BIO_POOL_BITS) >> BIO_POOL_BITS; + cmd->rw = bio->bi_rw; + cmd->size = bio->bi_size; + cmd->csize = 0; + cmd->id = id; + cmd->sector = bio->bi_sector; +}; + +int dst_trans_send(struct dst_trans *t); +int dst_trans_crypto(struct dst_trans *t); + +int dst_node_crypto_init(struct dst_node *n, struct dst_crypto_ctl *ctl); +void dst_node_crypto_exit(struct dst_node *n); + +static inline int dst_need_crypto(struct dst_node *n) +{ + struct dst_crypto_ctl *c = &n->crypto; + /* + * Logical OR is appropriate here, but boolean one produces + * more optimal code, so it is used instead. + */ + return (c->hash_algo[0] | c->cipher_algo[0]); +} + +int dst_node_trans_init(struct dst_node *n, unsigned int size); +void dst_node_trans_exit(struct dst_node *n); + +/* + * Pool of threads. + * Ready list contains threads currently free to be used, + * active one contains threads with some work scheduled for them. + * Caller can wait in given queue when thread is ready. + */ +struct thread_pool +{ + int thread_num; + struct mutex thread_lock; + struct list_head ready_list, active_list; + + wait_queue_head_t wait; +}; + +void thread_pool_del_worker(struct thread_pool *p); +void thread_pool_del_worker_id(struct thread_pool *p, unsigned int id); +int thread_pool_add_worker(struct thread_pool *p, + char *name, + unsigned int id, + void *(* init)(void *data), + void (* cleanup)(void *data), + void *data); + +void thread_pool_destroy(struct thread_pool *p); +struct thread_pool *thread_pool_create(int num, char *name, + void *(* init)(void *data), + void (* cleanup)(void *data), + void *data); + +int thread_pool_schedule(struct thread_pool *p, + int (* setup)(void *stored_private, void *setup_data), + int (* action)(void *stored_private, void *setup_data), + void *setup_data, long timeout); +int thread_pool_schedule_private(struct thread_pool *p, + int (* setup)(void *private, void *data), + int (* action)(void *private, void *data), + void *data, long timeout, void *id); + +#endif /* __KERNEL__ */ +#endif /* __DST_H */ diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index a7f8134c594..015a3d22cf7 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -1,15 +1,18 @@ #ifndef _LINUX_FTRACE_H #define _LINUX_FTRACE_H -#include <linux/linkage.h> -#include <linux/fs.h> -#include <linux/ktime.h> -#include <linux/init.h> -#include <linux/types.h> -#include <linux/module.h> +#include <linux/trace_clock.h> #include <linux/kallsyms.h> +#include <linux/linkage.h> #include <linux/bitops.h> +#include <linux/module.h> +#include <linux/ktime.h> #include <linux/sched.h> +#include <linux/types.h> +#include <linux/init.h> +#include <linux/fs.h> + +#include <asm/ftrace.h> #ifdef CONFIG_FUNCTION_TRACER @@ -95,9 +98,41 @@ stack_trace_sysctl(struct ctl_table *table, int write, loff_t *ppos); #endif +struct ftrace_func_command { + struct list_head list; + char *name; + int (*func)(char *func, char *cmd, + char *params, int enable); +}; + #ifdef CONFIG_DYNAMIC_FTRACE -/* asm/ftrace.h must be defined for archs supporting dynamic ftrace */ -#include <asm/ftrace.h> + +int ftrace_arch_code_modify_prepare(void); +int ftrace_arch_code_modify_post_process(void); + +struct seq_file; + +struct ftrace_probe_ops { + void (*func)(unsigned long ip, + unsigned long parent_ip, + void **data); + int (*callback)(unsigned long ip, void **data); + void (*free)(void **data); + int (*print)(struct seq_file *m, + unsigned long ip, + struct ftrace_probe_ops *ops, + void *data); +}; + +extern int +register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, + void *data); +extern void +unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, + void *data); +extern void +unregister_ftrace_function_probe_func(char *glob, struct ftrace_probe_ops *ops); +extern void unregister_ftrace_function_probe_all(char *glob); enum { FTRACE_FL_FREE = (1 << 0), @@ -110,15 +145,23 @@ enum { }; struct dyn_ftrace { - struct list_head list; - unsigned long ip; /* address of mcount call-site */ - unsigned long flags; - struct dyn_arch_ftrace arch; + union { + unsigned long ip; /* address of mcount call-site */ + struct dyn_ftrace *freelist; + }; + union { + unsigned long flags; + struct dyn_ftrace *newlist; + }; + struct dyn_arch_ftrace arch; }; int ftrace_force_update(void); void ftrace_set_filter(unsigned char *buf, int len, int reset); +int register_ftrace_command(struct ftrace_func_command *cmd); +int unregister_ftrace_command(struct ftrace_func_command *cmd); + /* defined in arch */ extern int ftrace_ip_converted(unsigned long ip); extern int ftrace_dyn_arch_init(void *data); @@ -126,6 +169,10 @@ extern int ftrace_update_ftrace_func(ftrace_func_t func); extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); + +#ifndef FTRACE_ADDR +#define FTRACE_ADDR ((unsigned long)ftrace_caller) +#endif #ifdef CONFIG_FUNCTION_GRAPH_TRACER extern void ftrace_graph_caller(void); extern int ftrace_enable_ftrace_graph_caller(void); @@ -136,7 +183,7 @@ static inline int ftrace_disable_ftrace_graph_caller(void) { return 0; } #endif /** - * ftrace_make_nop - convert code into top + * ftrace_make_nop - convert code into nop * @mod: module structure if called by module load initialization * @rec: the mcount call site record * @addr: the address that the call site should be calling @@ -181,7 +228,6 @@ extern int ftrace_make_nop(struct module *mod, */ extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr); - /* May be defined in arch */ extern int ftrace_arch_read_dyn_info(char *buf, int size); @@ -198,6 +244,14 @@ extern void ftrace_enable_daemon(void); # define ftrace_disable_daemon() do { } while (0) # define ftrace_enable_daemon() do { } while (0) static inline void ftrace_release(void *start, unsigned long size) { } +static inline int register_ftrace_command(struct ftrace_func_command *cmd) +{ + return -EINVAL; +} +static inline int unregister_ftrace_command(char *cmd_name) +{ + return -EINVAL; +} #endif /* CONFIG_DYNAMIC_FTRACE */ /* totally disable ftrace - can not re-enable after this */ @@ -233,24 +287,25 @@ static inline void __ftrace_enabled_restore(int enabled) #endif } -#ifdef CONFIG_FRAME_POINTER -/* TODO: need to fix this for ARM */ -# define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) -# define CALLER_ADDR1 ((unsigned long)__builtin_return_address(1)) -# define CALLER_ADDR2 ((unsigned long)__builtin_return_address(2)) -# define CALLER_ADDR3 ((unsigned long)__builtin_return_address(3)) -# define CALLER_ADDR4 ((unsigned long)__builtin_return_address(4)) -# define CALLER_ADDR5 ((unsigned long)__builtin_return_address(5)) -# define CALLER_ADDR6 ((unsigned long)__builtin_return_address(6)) -#else -# define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) -# define CALLER_ADDR1 0UL -# define CALLER_ADDR2 0UL -# define CALLER_ADDR3 0UL -# define CALLER_ADDR4 0UL -# define CALLER_ADDR5 0UL -# define CALLER_ADDR6 0UL -#endif +#ifndef HAVE_ARCH_CALLER_ADDR +# ifdef CONFIG_FRAME_POINTER +# define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) +# define CALLER_ADDR1 ((unsigned long)__builtin_return_address(1)) +# define CALLER_ADDR2 ((unsigned long)__builtin_return_address(2)) +# define CALLER_ADDR3 ((unsigned long)__builtin_return_address(3)) +# define CALLER_ADDR4 ((unsigned long)__builtin_return_address(4)) +# define CALLER_ADDR5 ((unsigned long)__builtin_return_address(5)) +# define CALLER_ADDR6 ((unsigned long)__builtin_return_address(6)) +# else +# define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) +# define CALLER_ADDR1 0UL +# define CALLER_ADDR2 0UL +# define CALLER_ADDR3 0UL +# define CALLER_ADDR4 0UL +# define CALLER_ADDR5 0UL +# define CALLER_ADDR6 0UL +# endif +#endif /* ifndef HAVE_ARCH_CALLER_ADDR */ #ifdef CONFIG_IRQSOFF_TRACER extern void time_hardirqs_on(unsigned long a0, unsigned long a1); @@ -268,54 +323,6 @@ static inline void __ftrace_enabled_restore(int enabled) # define trace_preempt_off(a0, a1) do { } while (0) #endif -#ifdef CONFIG_TRACING -extern int ftrace_dump_on_oops; - -extern void tracing_start(void); -extern void tracing_stop(void); -extern void ftrace_off_permanent(void); - -extern void -ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); - -/** - * ftrace_printk - printf formatting in the ftrace buffer - * @fmt: the printf format for printing - * - * Note: __ftrace_printk is an internal function for ftrace_printk and - * the @ip is passed in via the ftrace_printk macro. - * - * This function allows a kernel developer to debug fast path sections - * that printk is not appropriate for. By scattering in various - * printk like tracing in the code, a developer can quickly see - * where problems are occurring. - * - * This is intended as a debugging tool for the developer only. - * Please refrain from leaving ftrace_printks scattered around in - * your code. - */ -# define ftrace_printk(fmt...) __ftrace_printk(_THIS_IP_, fmt) -extern int -__ftrace_printk(unsigned long ip, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); -extern void ftrace_dump(void); -#else -static inline void -ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { } -static inline int -ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); - -static inline void tracing_start(void) { } -static inline void tracing_stop(void) { } -static inline void ftrace_off_permanent(void) { } -static inline int -ftrace_printk(const char *fmt, ...) -{ - return 0; -} -static inline void ftrace_dump(void) { } -#endif - #ifdef CONFIG_FTRACE_MCOUNT_RECORD extern void ftrace_init(void); extern void ftrace_init_module(struct module *mod, @@ -327,36 +334,6 @@ ftrace_init_module(struct module *mod, unsigned long *start, unsigned long *end) { } #endif -enum { - POWER_NONE = 0, - POWER_CSTATE = 1, - POWER_PSTATE = 2, -}; - -struct power_trace { -#ifdef CONFIG_POWER_TRACER - ktime_t stamp; - ktime_t end; - int type; - int state; -#endif -}; - -#ifdef CONFIG_POWER_TRACER -extern void trace_power_start(struct power_trace *it, unsigned int type, - unsigned int state); -extern void trace_power_mark(struct power_trace *it, unsigned int type, - unsigned int state); -extern void trace_power_end(struct power_trace *it); -#else -static inline void trace_power_start(struct power_trace *it, unsigned int type, - unsigned int state) { } -static inline void trace_power_mark(struct power_trace *it, unsigned int type, - unsigned int state) { } -static inline void trace_power_end(struct power_trace *it) { } -#endif - - /* * Structure that defines an entry function trace. */ @@ -398,8 +375,7 @@ struct ftrace_ret_stack { extern void return_to_handler(void); extern int -ftrace_push_return_trace(unsigned long ret, unsigned long long time, - unsigned long func, int *depth); +ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth); extern void ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret); @@ -514,6 +490,50 @@ static inline int test_tsk_trace_graph(struct task_struct *tsk) return tsk->trace & TSK_TRACE_FL_GRAPH; } +extern int ftrace_dump_on_oops; + #endif /* CONFIG_TRACING */ + +#ifdef CONFIG_HW_BRANCH_TRACER + +void trace_hw_branch(u64 from, u64 to); +void trace_hw_branch_oops(void); + +#else /* CONFIG_HW_BRANCH_TRACER */ + +static inline void trace_hw_branch(u64 from, u64 to) {} +static inline void trace_hw_branch_oops(void) {} + +#endif /* CONFIG_HW_BRANCH_TRACER */ + +/* + * A syscall entry in the ftrace syscalls array. + * + * @name: name of the syscall + * @nb_args: number of parameters it takes + * @types: list of types as strings + * @args: list of args as strings (args[i] matches types[i]) + */ +struct syscall_metadata { + const char *name; + int nb_args; + const char **types; + const char **args; +}; + +#ifdef CONFIG_FTRACE_SYSCALLS +extern void arch_init_ftrace_syscalls(void); +extern struct syscall_metadata *syscall_nr_to_meta(int nr); +extern void start_ftrace_syscalls(void); +extern void stop_ftrace_syscalls(void); +extern void ftrace_syscall_enter(struct pt_regs *regs); +extern void ftrace_syscall_exit(struct pt_regs *regs); +#else +static inline void start_ftrace_syscalls(void) { } +static inline void stop_ftrace_syscalls(void) { } +static inline void ftrace_syscall_enter(struct pt_regs *regs) { } +static inline void ftrace_syscall_exit(struct pt_regs *regs) { } +#endif + #endif /* _LINUX_FTRACE_H */ diff --git a/include/linux/ftrace_irq.h b/include/linux/ftrace_irq.h index 366a054d0b0..dca7bf8cffe 100644 --- a/include/linux/ftrace_irq.h +++ b/include/linux/ftrace_irq.h @@ -2,7 +2,7 @@ #define _LINUX_FTRACE_IRQ_H -#if defined(CONFIG_DYNAMIC_FTRACE) || defined(CONFIG_FUNCTION_GRAPH_TRACER) +#ifdef CONFIG_FTRACE_NMI_ENTER extern void ftrace_nmi_enter(void); extern void ftrace_nmi_exit(void); #else diff --git a/include/linux/gfp.h b/include/linux/gfp.h index dd20cd78faa..0bbc15f5453 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -4,6 +4,7 @@ #include <linux/mmzone.h> #include <linux/stddef.h> #include <linux/linkage.h> +#include <linux/topology.h> struct vm_area_struct; diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index f83288347dd..faa1cf848bc 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -15,55 +15,61 @@ * - bits 0-7 are the preemption count (max preemption depth: 256) * - bits 8-15 are the softirq count (max # of softirqs: 256) * - * The hardirq count can be overridden per architecture, the default is: + * The hardirq count can in theory reach the same as NR_IRQS. + * In reality, the number of nested IRQS is limited to the stack + * size as well. For archs with over 1000 IRQS it is not practical + * to expect that they will all nest. We give a max of 10 bits for + * hardirq nesting. An arch may choose to give less than 10 bits. + * m68k expects it to be 8. * - * - bits 16-27 are the hardirq count (max # of hardirqs: 4096) - * - ( bit 28 is the PREEMPT_ACTIVE flag. ) + * - bits 16-25 are the hardirq count (max # of nested hardirqs: 1024) + * - bit 26 is the NMI_MASK + * - bit 28 is the PREEMPT_ACTIVE flag * * PREEMPT_MASK: 0x000000ff * SOFTIRQ_MASK: 0x0000ff00 - * HARDIRQ_MASK: 0x0fff0000 + * HARDIRQ_MASK: 0x03ff0000 + * NMI_MASK: 0x04000000 */ #define PREEMPT_BITS 8 #define SOFTIRQ_BITS 8 +#define NMI_BITS 1 -#ifndef HARDIRQ_BITS -#define HARDIRQ_BITS 12 +#define MAX_HARDIRQ_BITS 10 -#ifndef MAX_HARDIRQS_PER_CPU -#define MAX_HARDIRQS_PER_CPU NR_IRQS +#ifndef HARDIRQ_BITS +# define HARDIRQ_BITS MAX_HARDIRQ_BITS #endif -/* - * The hardirq mask has to be large enough to have space for potentially - * all IRQ sources in the system nesting on a single CPU. - */ -#if (1 << HARDIRQ_BITS) < MAX_HARDIRQS_PER_CPU -# error HARDIRQ_BITS is too low! -#endif +#if HARDIRQ_BITS > MAX_HARDIRQ_BITS +#error HARDIRQ_BITS too high! #endif #define PREEMPT_SHIFT 0 #define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS) #define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS) +#define NMI_SHIFT (HARDIRQ_SHIFT + HARDIRQ_BITS) #define __IRQ_MASK(x) ((1UL << (x))-1) #define PREEMPT_MASK (__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT) #define SOFTIRQ_MASK (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT) #define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT) +#define NMI_MASK (__IRQ_MASK(NMI_BITS) << NMI_SHIFT) #define PREEMPT_OFFSET (1UL << PREEMPT_SHIFT) #define SOFTIRQ_OFFSET (1UL << SOFTIRQ_SHIFT) #define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT) +#define NMI_OFFSET (1UL << NMI_SHIFT) -#if PREEMPT_ACTIVE < (1 << (HARDIRQ_SHIFT + HARDIRQ_BITS)) +#if PREEMPT_ACTIVE < (1 << (NMI_SHIFT + NMI_BITS)) #error PREEMPT_ACTIVE is too low! #endif #define hardirq_count() (preempt_count() & HARDIRQ_MASK) #define softirq_count() (preempt_count() & SOFTIRQ_MASK) -#define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK)) +#define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \ + | NMI_MASK)) /* * Are we doing bottom half or hardware interrupt processing? @@ -73,6 +79,11 @@ #define in_softirq() (softirq_count()) #define in_interrupt() (irq_count()) +/* + * Are we in NMI context? + */ +#define in_nmi() (preempt_count() & NMI_MASK) + #if defined(CONFIG_PREEMPT) # define PREEMPT_INATOMIC_BASE kernel_locked() # define PREEMPT_CHECK_OFFSET 1 @@ -164,20 +175,24 @@ extern void irq_enter(void); */ extern void irq_exit(void); -#define nmi_enter() \ - do { \ - ftrace_nmi_enter(); \ - lockdep_off(); \ - rcu_nmi_enter(); \ - __irq_enter(); \ +#define nmi_enter() \ + do { \ + ftrace_nmi_enter(); \ + BUG_ON(in_nmi()); \ + add_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \ + lockdep_off(); \ + rcu_nmi_enter(); \ + trace_hardirq_enter(); \ } while (0) -#define nmi_exit() \ - do { \ - __irq_exit(); \ - rcu_nmi_exit(); \ - lockdep_on(); \ - ftrace_nmi_exit(); \ +#define nmi_exit() \ + do { \ + trace_hardirq_exit(); \ + rcu_nmi_exit(); \ + lockdep_on(); \ + BUG_ON(!in_nmi()); \ + sub_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \ + ftrace_nmi_exit(); \ } while (0) #endif /* LINUX_HARDIRQ_H */ diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index c68bffd182b..ce2c07d99fc 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -278,6 +278,11 @@ enum NR_SOFTIRQS }; +/* map softirq index to softirq name. update 'softirq_to_name' in + * kernel/softirq.c when adding a new softirq. + */ +extern char *softirq_to_name[NR_SOFTIRQS]; + /* softirq mask and active fields moved to irq_cpustat_t in * asm/hardirq.h to get better cache usage. KAO */ diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index f3fe34391d8..792274269f2 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -13,10 +13,17 @@ #define KSYM_SYMBOL_LEN (sizeof("%s+%#lx/%#lx [%s]") + (KSYM_NAME_LEN - 1) + \ 2*(BITS_PER_LONG*3/10) + (MODULE_NAME_LEN - 1) + 1) +struct module; + #ifdef CONFIG_KALLSYMS /* Lookup the address for a symbol. Returns 0 if not found. */ unsigned long kallsyms_lookup_name(const char *name); +/* Call a function on each kallsyms symbol in the core kernel */ +int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, + unsigned long), + void *data); + extern int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize, unsigned long *offset); @@ -43,6 +50,14 @@ static inline unsigned long kallsyms_lookup_name(const char *name) return 0; } +static inline int kallsyms_on_each_symbol(int (*fn)(void *, const char *, + struct module *, + unsigned long), + void *data) +{ + return 0; +} + static inline int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize, unsigned long *offset) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 556d781e69f..d9e75ec7def 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -242,6 +242,19 @@ extern struct ratelimit_state printk_ratelimit_state; extern int printk_ratelimit(void); extern bool printk_timed_ratelimit(unsigned long *caller_jiffies, unsigned int interval_msec); + +/* + * Print a one-time message (analogous to WARN_ONCE() et al): + */ +#define printk_once(x...) ({ \ + static int __print_once = 1; \ + \ + if (__print_once) { \ + __print_once = 0; \ + printk(x); \ + } \ +}) + void log_buf_kexec_setup(void); #else static inline int vprintk(const char *s, va_list args) @@ -254,6 +267,10 @@ static inline int printk_ratelimit(void) { return 0; } static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies, \ unsigned int interval_msec) \ { return false; } + +/* No effect, but we still get type checking even in the !PRINTK case: */ +#define printk_once(x...) printk(x) + static inline void log_buf_kexec_setup(void) { } @@ -375,6 +392,139 @@ static inline char *pack_hex_byte(char *buf, u8 byte) #endif /* + * General tracing related utility functions - trace_printk(), + * tracing_on/tracing_off and tracing_start()/tracing_stop + * + * Use tracing_on/tracing_off when you want to quickly turn on or off + * tracing. It simply enables or disables the recording of the trace events. + * This also corresponds to the user space debugfs/tracing/tracing_on + * file, which gives a means for the kernel and userspace to interact. + * Place a tracing_off() in the kernel where you want tracing to end. + * From user space, examine the trace, and then echo 1 > tracing_on + * to continue tracing. + * + * tracing_stop/tracing_start has slightly more overhead. It is used + * by things like suspend to ram where disabling the recording of the + * trace is not enough, but tracing must actually stop because things + * like calling smp_processor_id() may crash the system. + * + * Most likely, you want to use tracing_on/tracing_off. + */ +#ifdef CONFIG_RING_BUFFER +void tracing_on(void); +void tracing_off(void); +/* trace_off_permanent stops recording with no way to bring it back */ +void tracing_off_permanent(void); +int tracing_is_on(void); +#else +static inline void tracing_on(void) { } +static inline void tracing_off(void) { } +static inline void tracing_off_permanent(void) { } +static inline int tracing_is_on(void) { return 0; } +#endif +#ifdef CONFIG_TRACING +extern void tracing_start(void); +extern void tracing_stop(void); +extern void ftrace_off_permanent(void); + +extern void +ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); + +static inline void __attribute__ ((format (printf, 1, 2))) +____trace_printk_check_format(const char *fmt, ...) +{ +} +#define __trace_printk_check_format(fmt, args...) \ +do { \ + if (0) \ + ____trace_printk_check_format(fmt, ##args); \ +} while (0) + +/** + * trace_printk - printf formatting in the ftrace buffer + * @fmt: the printf format for printing + * + * Note: __trace_printk is an internal function for trace_printk and + * the @ip is passed in via the trace_printk macro. + * + * This function allows a kernel developer to debug fast path sections + * that printk is not appropriate for. By scattering in various + * printk like tracing in the code, a developer can quickly see + * where problems are occurring. + * + * This is intended as a debugging tool for the developer only. + * Please refrain from leaving trace_printks scattered around in + * your code. + */ + +#define trace_printk(fmt, args...) \ +do { \ + __trace_printk_check_format(fmt, ##args); \ + if (__builtin_constant_p(fmt)) { \ + static const char *trace_printk_fmt \ + __attribute__((section("__trace_printk_fmt"))) = \ + __builtin_constant_p(fmt) ? fmt : NULL; \ + \ + __trace_bprintk(_THIS_IP_, trace_printk_fmt, ##args); \ + } else \ + __trace_printk(_THIS_IP_, fmt, ##args); \ +} while (0) + +extern int +__trace_bprintk(unsigned long ip, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))); + +extern int +__trace_printk(unsigned long ip, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))); + +/* + * The double __builtin_constant_p is because gcc will give us an error + * if we try to allocate the static variable to fmt if it is not a + * constant. Even with the outer if statement. + */ +#define ftrace_vprintk(fmt, vargs) \ +do { \ + if (__builtin_constant_p(fmt)) { \ + static const char *trace_printk_fmt \ + __attribute__((section("__trace_printk_fmt"))) = \ + __builtin_constant_p(fmt) ? fmt : NULL; \ + \ + __ftrace_vbprintk(_THIS_IP_, trace_printk_fmt, vargs); \ + } else \ + __ftrace_vprintk(_THIS_IP_, fmt, vargs); \ +} while (0) + +extern int +__ftrace_vbprintk(unsigned long ip, const char *fmt, va_list ap); + +extern int +__ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap); + +extern void ftrace_dump(void); +#else +static inline void +ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { } +static inline int +trace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); + +static inline void tracing_start(void) { } +static inline void tracing_stop(void) { } +static inline void ftrace_off_permanent(void) { } +static inline int +trace_printk(const char *fmt, ...) +{ + return 0; +} +static inline int +ftrace_vprintk(const char *fmt, va_list ap) +{ + return 0; +} +static inline void ftrace_dump(void) { } +#endif /* CONFIG_TRACING */ + +/* * Display an IP address in readable format. */ diff --git a/include/linux/kmod.h b/include/linux/kmod.h index 92213a9194e..d5fa565086d 100644 --- a/include/linux/kmod.h +++ b/include/linux/kmod.h @@ -29,10 +29,15 @@ #ifdef CONFIG_MODULES /* modprobe exit status on success, -ve on error. Return value * usually useless though. */ -extern int request_module(const char * name, ...) __attribute__ ((format (printf, 1, 2))); -#define try_then_request_module(x, mod...) ((x) ?: (request_module(mod), (x))) +extern int __request_module(bool wait, const char *name, ...) \ + __attribute__((format(printf, 2, 3))); +#define request_module(mod...) __request_module(true, mod) +#define request_module_nowait(mod...) __request_module(false, mod) +#define try_then_request_module(x, mod...) \ + ((x) ?: (__request_module(false, mod), (x))) #else -static inline int request_module(const char * name, ...) { return -ENOSYS; } +static inline int request_module(const char *name, ...) { return -ENOSYS; } +static inline int request_module_nowait(const char *name, ...) { return -ENOSYS; } #define try_then_request_module(x, mod...) (x) #endif diff --git a/include/linux/memory.h b/include/linux/memory.h index 42767d1a62e..37fa19b34ef 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -110,4 +110,10 @@ struct memory_accessor { off_t offset, size_t count); }; +/* + * Kernel text modification mutex, used for code patching. Users of this lock + * can sleep. + */ +extern struct mutex text_mutex; + #endif /* _LINUX_MEMORY_H_ */ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 26ef24076b7..186ec6ab334 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -764,12 +764,6 @@ extern int numa_zonelist_order_handler(struct ctl_table *, int, extern char numa_zonelist_order[]; #define NUMA_ZONELIST_ORDER_LEN 16 /* string buffer size */ -#include <linux/topology.h> -/* Returns the number of the current Node. */ -#ifndef numa_node_id -#define numa_node_id() (cpu_to_node(raw_smp_processor_id())) -#endif - #ifndef CONFIG_NEED_MULTIPLE_NODES extern struct pglist_data contig_page_data; diff --git a/include/linux/module.h b/include/linux/module.h index 145a75528cc..627ac082e2a 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -248,6 +248,10 @@ struct module const unsigned long *crcs; unsigned int num_syms; + /* Kernel parameters. */ + struct kernel_param *kp; + unsigned int num_kp; + /* GPL-only exported symbols. */ unsigned int num_gpl_syms; const struct kernel_symbol *gpl_syms; @@ -329,6 +333,11 @@ struct module unsigned int num_tracepoints; #endif +#ifdef CONFIG_TRACING + const char **trace_bprintk_fmt_start; + unsigned int num_trace_bprintk_fmt; +#endif + #ifdef CONFIG_MODULE_UNLOAD /* What modules depend on me? */ struct list_head modules_which_use_me; @@ -350,6 +359,8 @@ struct module #define MODULE_ARCH_INIT {} #endif +extern struct mutex module_mutex; + /* FIXME: It'd be nice to isolate modules during init, too, so they aren't used before they (may) fail. But presently too much code (IDE & SCSI) require entry into the module during init.*/ @@ -358,10 +369,10 @@ static inline int module_is_live(struct module *mod) return mod->state != MODULE_STATE_GOING; } -/* Is this address in a module? (second is with no locks, for oops) */ -struct module *module_text_address(unsigned long addr); struct module *__module_text_address(unsigned long addr); -int is_module_address(unsigned long addr); +struct module *__module_address(unsigned long addr); +bool is_module_address(unsigned long addr); +bool is_module_text_address(unsigned long addr); static inline int within_module_core(unsigned long addr, struct module *mod) { @@ -375,6 +386,31 @@ static inline int within_module_init(unsigned long addr, struct module *mod) addr < (unsigned long)mod->module_init + mod->init_size; } +/* Search for module by name: must hold module_mutex. */ +struct module *find_module(const char *name); + +struct symsearch { + const struct kernel_symbol *start, *stop; + const unsigned long *crcs; + enum { + NOT_GPL_ONLY, + GPL_ONLY, + WILL_BE_GPL_ONLY, + } licence; + bool unused; +}; + +/* Search for an exported symbol by name. */ +const struct kernel_symbol *find_symbol(const char *name, + struct module **owner, + const unsigned long **crc, + bool gplok, + bool warn); + +/* Walk the exported symbol table */ +bool each_symbol(bool (*fn)(const struct symsearch *arr, struct module *owner, + unsigned int symnum, void *data), void *data); + /* Returns 0 and fills in value, defined and namebuf, or -ERANGE if symnum out of range. */ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, @@ -383,6 +419,10 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, /* Look for this name: can be of form module:name. */ unsigned long module_kallsyms_lookup_name(const char *name); +int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *, + struct module *, unsigned long), + void *data); + extern void __module_put_and_exit(struct module *mod, long code) __attribute__((noreturn)); #define module_put_and_exit(code) __module_put_and_exit(THIS_MODULE, code); @@ -444,6 +484,7 @@ static inline void __module_get(struct module *module) #define symbol_put_addr(p) do { } while(0) #endif /* CONFIG_MODULE_UNLOAD */ +int use_module(struct module *a, struct module *b); /* This is a #define so the string doesn't get put in every .o file */ #define module_name(mod) \ @@ -490,21 +531,24 @@ search_module_extables(unsigned long addr) return NULL; } -/* Is this address in a module? */ -static inline struct module *module_text_address(unsigned long addr) +static inline struct module *__module_address(unsigned long addr) { return NULL; } -/* Is this address in a module? (don't take a lock, we're oopsing) */ static inline struct module *__module_text_address(unsigned long addr) { return NULL; } -static inline int is_module_address(unsigned long addr) +static inline bool is_module_address(unsigned long addr) { - return 0; + return false; +} + +static inline bool is_module_text_address(unsigned long addr) +{ + return false; } /* Get/put a kernel symbol (calls should be symmetric) */ @@ -559,6 +603,14 @@ static inline unsigned long module_kallsyms_lookup_name(const char *name) return 0; } +static inline int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *, + struct module *, + unsigned long), + void *data) +{ + return 0; +} + static inline int register_module_notifier(struct notifier_block * nb) { /* no events will happen anyway, so this can always succeed */ diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index e4af3399ef4..a4f0b931846 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -138,6 +138,16 @@ extern int parse_args(const char *name, unsigned num, int (*unknown)(char *param, char *val)); +/* Called by module remove. */ +#ifdef CONFIG_SYSFS +extern void destroy_params(const struct kernel_param *params, unsigned num); +#else +static inline void destroy_params(const struct kernel_param *params, + unsigned num) +{ +} +#endif /* !CONFIG_SYSFS */ + /* All the helper functions */ /* The macros to do compile-time type checking stolen from Jakub Jelinek, who IIRC came up with this idea for the 2.4 module init code. */ diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index b3b35966008..e1b7b217388 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -8,7 +8,7 @@ struct ring_buffer; struct ring_buffer_iter; /* - * Don't reference this struct directly, use functions below. + * Don't refer to this struct directly, use functions below. */ struct ring_buffer_event { u32 type:2, len:3, time_delta:27; @@ -18,10 +18,13 @@ struct ring_buffer_event { /** * enum ring_buffer_type - internal ring buffer types * - * @RINGBUF_TYPE_PADDING: Left over page padding - * array is ignored - * size is variable depending on how much + * @RINGBUF_TYPE_PADDING: Left over page padding or discarded event + * If time_delta is 0: + * array is ignored + * size is variable depending on how much * padding is needed + * If time_delta is non zero: + * everything else same as RINGBUF_TYPE_DATA * * @RINGBUF_TYPE_TIME_EXTEND: Extend the time delta * array[0] = time delta (28 .. 59) @@ -65,6 +68,8 @@ ring_buffer_event_time_delta(struct ring_buffer_event *event) return event->time_delta; } +void ring_buffer_event_discard(struct ring_buffer_event *event); + /* * size is in bytes for each per CPU buffer. */ @@ -74,13 +79,10 @@ void ring_buffer_free(struct ring_buffer *buffer); int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size); -struct ring_buffer_event * -ring_buffer_lock_reserve(struct ring_buffer *buffer, - unsigned long length, - unsigned long *flags); +struct ring_buffer_event *ring_buffer_lock_reserve(struct ring_buffer *buffer, + unsigned long length); int ring_buffer_unlock_commit(struct ring_buffer *buffer, - struct ring_buffer_event *event, - unsigned long flags); + struct ring_buffer_event *event); int ring_buffer_write(struct ring_buffer *buffer, unsigned long length, void *data); @@ -121,17 +123,19 @@ unsigned long ring_buffer_overruns(struct ring_buffer *buffer); unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu); unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu); -u64 ring_buffer_time_stamp(int cpu); -void ring_buffer_normalize_time_stamp(int cpu, u64 *ts); +u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu); +void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer, + int cpu, u64 *ts); +void ring_buffer_set_clock(struct ring_buffer *buffer, + u64 (*clock)(void)); + +size_t ring_buffer_page_len(void *page); -void tracing_on(void); -void tracing_off(void); -void tracing_off_permanent(void); void *ring_buffer_alloc_read_page(struct ring_buffer *buffer); void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data); -int ring_buffer_read_page(struct ring_buffer *buffer, - void **data_page, int cpu, int full); +int ring_buffer_read_page(struct ring_buffer *buffer, void **data_page, + size_t len, int cpu, int full); enum ring_buffer_flags { RB_FL_OVERWRITE = 1 << 0, diff --git a/include/linux/sched.h b/include/linux/sched.h index 9da5aa0771e..b94f3541f67 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -138,6 +138,8 @@ extern unsigned long nr_uninterruptible(void); extern unsigned long nr_active(void); extern unsigned long nr_iowait(void); +extern unsigned long get_parent_ip(unsigned long addr); + struct seq_file; struct cfs_rq; struct task_group; @@ -1405,6 +1407,8 @@ struct task_struct { int curr_ret_stack; /* Stack of return addresses for return function tracing */ struct ftrace_ret_stack *ret_stack; + /* time stamp for last schedule */ + unsigned long long ftrace_timestamp; /* * Number of functions that haven't been traced * because of depth overrun. diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 6ca6a7b66d7..f4523651fa4 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -14,6 +14,7 @@ #include <asm/page.h> /* kmalloc_sizes.h needs PAGE_SIZE */ #include <asm/cache.h> /* kmalloc_sizes.h needs L1_CACHE_BYTES */ #include <linux/compiler.h> +#include <trace/kmemtrace.h> /* Size description struct for general caches. */ struct cache_sizes { @@ -28,8 +29,26 @@ extern struct cache_sizes malloc_sizes[]; void *kmem_cache_alloc(struct kmem_cache *, gfp_t); void *__kmalloc(size_t size, gfp_t flags); -static inline void *kmalloc(size_t size, gfp_t flags) +#ifdef CONFIG_KMEMTRACE +extern void *kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags); +extern size_t slab_buffer_size(struct kmem_cache *cachep); +#else +static __always_inline void * +kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags) { + return kmem_cache_alloc(cachep, flags); +} +static inline size_t slab_buffer_size(struct kmem_cache *cachep) +{ + return 0; +} +#endif + +static __always_inline void *kmalloc(size_t size, gfp_t flags) +{ + struct kmem_cache *cachep; + void *ret; + if (__builtin_constant_p(size)) { int i = 0; @@ -47,10 +66,17 @@ static inline void *kmalloc(size_t size, gfp_t flags) found: #ifdef CONFIG_ZONE_DMA if (flags & GFP_DMA) - return kmem_cache_alloc(malloc_sizes[i].cs_dmacachep, - flags); + cachep = malloc_sizes[i].cs_dmacachep; + else #endif - return kmem_cache_alloc(malloc_sizes[i].cs_cachep, flags); + cachep = malloc_sizes[i].cs_cachep; + + ret = kmem_cache_alloc_notrace(cachep, flags); + + kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, _THIS_IP_, ret, + size, slab_buffer_size(cachep), flags); + + return ret; } return __kmalloc(size, flags); } @@ -59,8 +85,25 @@ found: extern void *__kmalloc_node(size_t size, gfp_t flags, int node); extern void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node); -static inline void *kmalloc_node(size_t size, gfp_t flags, int node) +#ifdef CONFIG_KMEMTRACE +extern void *kmem_cache_alloc_node_notrace(struct kmem_cache *cachep, + gfp_t flags, + int nodeid); +#else +static __always_inline void * +kmem_cache_alloc_node_notrace(struct kmem_cache *cachep, + gfp_t flags, + int nodeid) +{ + return kmem_cache_alloc_node(cachep, flags, nodeid); +} +#endif + +static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) { + struct kmem_cache *cachep; + void *ret; + if (__builtin_constant_p(size)) { int i = 0; @@ -78,11 +121,18 @@ static inline void *kmalloc_node(size_t size, gfp_t flags, int node) found: #ifdef CONFIG_ZONE_DMA if (flags & GFP_DMA) - return kmem_cache_alloc_node(malloc_sizes[i].cs_dmacachep, - flags, node); + cachep = malloc_sizes[i].cs_dmacachep; + else #endif - return kmem_cache_alloc_node(malloc_sizes[i].cs_cachep, - flags, node); + cachep = malloc_sizes[i].cs_cachep; + + ret = kmem_cache_alloc_node_notrace(cachep, flags, node); + + kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, _THIS_IP_, + ret, size, slab_buffer_size(cachep), + flags, node); + + return ret; } return __kmalloc_node(size, flags, node); } diff --git a/include/linux/slob_def.h b/include/linux/slob_def.h index 59a3fa476ab..0ec00b39d00 100644 --- a/include/linux/slob_def.h +++ b/include/linux/slob_def.h @@ -3,14 +3,15 @@ void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node); -static inline void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) +static __always_inline void *kmem_cache_alloc(struct kmem_cache *cachep, + gfp_t flags) { return kmem_cache_alloc_node(cachep, flags, -1); } void *__kmalloc_node(size_t size, gfp_t flags, int node); -static inline void *kmalloc_node(size_t size, gfp_t flags, int node) +static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) { return __kmalloc_node(size, flags, node); } @@ -23,12 +24,12 @@ static inline void *kmalloc_node(size_t size, gfp_t flags, int node) * kmalloc is the normal method of allocating memory * in the kernel. */ -static inline void *kmalloc(size_t size, gfp_t flags) +static __always_inline void *kmalloc(size_t size, gfp_t flags) { return __kmalloc_node(size, flags, -1); } -static inline void *__kmalloc(size_t size, gfp_t flags) +static __always_inline void *__kmalloc(size_t size, gfp_t flags) { return kmalloc(size, flags); } diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index e37b6aa8a9f..a1f90528e70 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -10,6 +10,7 @@ #include <linux/gfp.h> #include <linux/workqueue.h> #include <linux/kobject.h> +#include <trace/kmemtrace.h> enum stat_item { ALLOC_FASTPATH, /* Allocation from cpu slab */ @@ -217,13 +218,31 @@ static __always_inline struct kmem_cache *kmalloc_slab(size_t size) void *kmem_cache_alloc(struct kmem_cache *, gfp_t); void *__kmalloc(size_t size, gfp_t flags); +#ifdef CONFIG_KMEMTRACE +extern void *kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags); +#else +static __always_inline void * +kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags) +{ + return kmem_cache_alloc(s, gfpflags); +} +#endif + static __always_inline void *kmalloc_large(size_t size, gfp_t flags) { - return (void *)__get_free_pages(flags | __GFP_COMP, get_order(size)); + unsigned int order = get_order(size); + void *ret = (void *) __get_free_pages(flags | __GFP_COMP, order); + + kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, _THIS_IP_, ret, + size, PAGE_SIZE << order, flags); + + return ret; } static __always_inline void *kmalloc(size_t size, gfp_t flags) { + void *ret; + if (__builtin_constant_p(size)) { if (size > SLUB_MAX_SIZE) return kmalloc_large(size, flags); @@ -234,7 +253,13 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags) if (!s) return ZERO_SIZE_PTR; - return kmem_cache_alloc(s, flags); + ret = kmem_cache_alloc_notrace(s, flags); + + kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, + _THIS_IP_, ret, + size, s->size, flags); + + return ret; } } return __kmalloc(size, flags); @@ -244,8 +269,24 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags) void *__kmalloc_node(size_t size, gfp_t flags, int node); void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node); +#ifdef CONFIG_KMEMTRACE +extern void *kmem_cache_alloc_node_notrace(struct kmem_cache *s, + gfp_t gfpflags, + int node); +#else +static __always_inline void * +kmem_cache_alloc_node_notrace(struct kmem_cache *s, + gfp_t gfpflags, + int node) +{ + return kmem_cache_alloc_node(s, gfpflags, node); +} +#endif + static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) { + void *ret; + if (__builtin_constant_p(size) && size <= SLUB_MAX_SIZE && !(flags & SLUB_DMA)) { struct kmem_cache *s = kmalloc_slab(size); @@ -253,7 +294,13 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) if (!s) return ZERO_SIZE_PTR; - return kmem_cache_alloc_node(s, flags, node); + ret = kmem_cache_alloc_node_notrace(s, flags, node); + + kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, + _THIS_IP_, ret, + size, s->size, flags, node); + + return ret; } return __kmalloc_node(size, flags, node); } diff --git a/include/linux/string.h b/include/linux/string.h index 8852739f36d..489019ef169 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -10,6 +10,7 @@ #include <linux/compiler.h> /* for inline */ #include <linux/types.h> /* for size_t */ #include <linux/stddef.h> /* for NULL */ +#include <stdarg.h> extern char *strndup_user(const char __user *, long); extern void *memdup_user(const void __user *, size_t); @@ -112,8 +113,23 @@ extern void argv_free(char **argv); extern bool sysfs_streq(const char *s1, const char *s2); +#ifdef CONFIG_BINARY_PRINTF +int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args); +int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf); +int bprintf(u32 *bin_buf, size_t size, const char *fmt, ...) __printf(3, 4); +#endif + extern ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos, const void *from, size_t available); +/** + * strstarts - does @str start with @prefix? + * @str: string to examine + * @prefix: prefix to look for. + */ +static inline bool strstarts(const char *str, const char *prefix) +{ + return strncmp(str, prefix, strlen(prefix)) == 0; +} #endif #endif /* _LINUX_STRING_H_ */ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index b299a82a05e..6470f74074a 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -65,6 +65,7 @@ struct old_linux_dirent; #include <asm/signal.h> #include <linux/quota.h> #include <linux/key.h> +#include <linux/ftrace.h> #define __SC_DECL1(t1, a1) t1 a1 #define __SC_DECL2(t2, a2, ...) t2 a2, __SC_DECL1(__VA_ARGS__) @@ -95,7 +96,46 @@ struct old_linux_dirent; #define __SC_TEST5(t5, a5, ...) __SC_TEST(t5); __SC_TEST4(__VA_ARGS__) #define __SC_TEST6(t6, a6, ...) __SC_TEST(t6); __SC_TEST5(__VA_ARGS__) +#ifdef CONFIG_FTRACE_SYSCALLS +#define __SC_STR_ADECL1(t, a) #a +#define __SC_STR_ADECL2(t, a, ...) #a, __SC_STR_ADECL1(__VA_ARGS__) +#define __SC_STR_ADECL3(t, a, ...) #a, __SC_STR_ADECL2(__VA_ARGS__) +#define __SC_STR_ADECL4(t, a, ...) #a, __SC_STR_ADECL3(__VA_ARGS__) +#define __SC_STR_ADECL5(t, a, ...) #a, __SC_STR_ADECL4(__VA_ARGS__) +#define __SC_STR_ADECL6(t, a, ...) #a, __SC_STR_ADECL5(__VA_ARGS__) + +#define __SC_STR_TDECL1(t, a) #t +#define __SC_STR_TDECL2(t, a, ...) #t, __SC_STR_TDECL1(__VA_ARGS__) +#define __SC_STR_TDECL3(t, a, ...) #t, __SC_STR_TDECL2(__VA_ARGS__) +#define __SC_STR_TDECL4(t, a, ...) #t, __SC_STR_TDECL3(__VA_ARGS__) +#define __SC_STR_TDECL5(t, a, ...) #t, __SC_STR_TDECL4(__VA_ARGS__) +#define __SC_STR_TDECL6(t, a, ...) #t, __SC_STR_TDECL5(__VA_ARGS__) + +#define SYSCALL_METADATA(sname, nb) \ + static const struct syscall_metadata __used \ + __attribute__((__aligned__(4))) \ + __attribute__((section("__syscalls_metadata"))) \ + __syscall_meta_##sname = { \ + .name = "sys"#sname, \ + .nb_args = nb, \ + .types = types_##sname, \ + .args = args_##sname, \ + } + +#define SYSCALL_DEFINE0(sname) \ + static const struct syscall_metadata __used \ + __attribute__((__aligned__(4))) \ + __attribute__((section("__syscalls_metadata"))) \ + __syscall_meta_##sname = { \ + .name = "sys_"#sname, \ + .nb_args = 0, \ + }; \ + asmlinkage long sys_##sname(void) + +#else #define SYSCALL_DEFINE0(name) asmlinkage long sys_##name(void) +#endif + #define SYSCALL_DEFINE1(name, ...) SYSCALL_DEFINEx(1, _##name, __VA_ARGS__) #define SYSCALL_DEFINE2(name, ...) SYSCALL_DEFINEx(2, _##name, __VA_ARGS__) #define SYSCALL_DEFINE3(name, ...) SYSCALL_DEFINEx(3, _##name, __VA_ARGS__) @@ -117,10 +157,26 @@ struct old_linux_dirent; #endif #endif +#ifdef CONFIG_FTRACE_SYSCALLS +#define SYSCALL_DEFINEx(x, sname, ...) \ + static const char *types_##sname[] = { \ + __SC_STR_TDECL##x(__VA_ARGS__) \ + }; \ + static const char *args_##sname[] = { \ + __SC_STR_ADECL##x(__VA_ARGS__) \ + }; \ + SYSCALL_METADATA(sname, x); \ + __SYSCALL_DEFINEx(x, sname, __VA_ARGS__) +#else +#define SYSCALL_DEFINEx(x, sname, ...) \ + __SYSCALL_DEFINEx(x, sname, __VA_ARGS__) +#endif + #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS #define SYSCALL_DEFINE(name) static inline long SYSC_##name -#define SYSCALL_DEFINEx(x, name, ...) \ + +#define __SYSCALL_DEFINEx(x, name, ...) \ asmlinkage long sys##name(__SC_DECL##x(__VA_ARGS__)); \ static inline long SYSC##name(__SC_DECL##x(__VA_ARGS__)); \ asmlinkage long SyS##name(__SC_LONG##x(__VA_ARGS__)) \ @@ -134,7 +190,7 @@ struct old_linux_dirent; #else /* CONFIG_HAVE_SYSCALL_WRAPPERS */ #define SYSCALL_DEFINE(name) asmlinkage long sys_##name -#define SYSCALL_DEFINEx(x, name, ...) \ +#define __SYSCALL_DEFINEx(x, name, ...) \ asmlinkage long sys##name(__SC_DECL##x(__VA_ARGS__)) #endif /* CONFIG_HAVE_SYSCALL_WRAPPERS */ @@ -462,9 +518,9 @@ asmlinkage long sys_pread64(unsigned int fd, char __user *buf, asmlinkage long sys_pwrite64(unsigned int fd, const char __user *buf, size_t count, loff_t pos); asmlinkage long sys_preadv(unsigned long fd, const struct iovec __user *vec, - unsigned long vlen, u32 pos_high, u32 pos_low); + unsigned long vlen, unsigned long pos_l, unsigned long pos_h); asmlinkage long sys_pwritev(unsigned long fd, const struct iovec __user *vec, - unsigned long vlen, u32 pos_high, u32 pos_low); + unsigned long vlen, unsigned long pos_l, unsigned long pos_h); asmlinkage long sys_getcwd(char __user *buf, unsigned long size); asmlinkage long sys_mkdir(const char __user *pathname, int mode); asmlinkage long sys_chdir(const char __user *filename); diff --git a/include/linux/topology.h b/include/linux/topology.h index a16b9e06f2e..7402c1a27c4 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -38,11 +38,7 @@ #endif #ifndef nr_cpus_node -#define nr_cpus_node(node) \ - ({ \ - node_to_cpumask_ptr(__tmp__, node); \ - cpus_weight(*__tmp__); \ - }) +#define nr_cpus_node(node) cpumask_weight(cpumask_of_node(node)) #endif #define for_each_node_with_cpus(node) \ @@ -200,4 +196,9 @@ int arch_update_cpu_topology(void); #define topology_core_cpumask(cpu) cpumask_of(cpu) #endif +/* Returns the number of the current Node. */ +#ifndef numa_node_id +#define numa_node_id() (cpu_to_node(raw_smp_processor_id())) +#endif + #endif /* _LINUX_TOPOLOGY_H */ diff --git a/include/linux/trace_clock.h b/include/linux/trace_clock.h new file mode 100644 index 00000000000..7a813038408 --- /dev/null +++ b/include/linux/trace_clock.h @@ -0,0 +1,19 @@ +#ifndef _LINUX_TRACE_CLOCK_H +#define _LINUX_TRACE_CLOCK_H + +/* + * 3 trace clock variants, with differing scalability/precision + * tradeoffs: + * + * - local: CPU-local trace clock + * - medium: scalable global clock with some jitter + * - global: globally monotonic, serialized clock + */ +#include <linux/compiler.h> +#include <linux/types.h> + +extern u64 notrace trace_clock_local(void); +extern u64 notrace trace_clock(void); +extern u64 notrace trace_clock_global(void); + +#endif /* _LINUX_TRACE_CLOCK_H */ diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 75700545836..d35a7ee7611 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -31,8 +31,8 @@ struct tracepoint { * Keep in sync with vmlinux.lds.h. */ -#define TPPROTO(args...) args -#define TPARGS(args...) args +#define TP_PROTO(args...) args +#define TP_ARGS(args...) args #ifdef CONFIG_TRACEPOINTS @@ -65,7 +65,7 @@ struct tracepoint { { \ if (unlikely(__tracepoint_##name.state)) \ __DO_TRACE(&__tracepoint_##name, \ - TPPROTO(proto), TPARGS(args)); \ + TP_PROTO(proto), TP_ARGS(args)); \ } \ static inline int register_trace_##name(void (*probe)(proto)) \ { \ @@ -153,4 +153,114 @@ static inline void tracepoint_synchronize_unregister(void) synchronize_sched(); } +#define PARAMS(args...) args +#define TRACE_FORMAT(name, proto, args, fmt) \ + DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) + + +/* + * For use with the TRACE_EVENT macro: + * + * We define a tracepoint, its arguments, its printk format + * and its 'fast binay record' layout. + * + * Firstly, name your tracepoint via TRACE_EVENT(name : the + * 'subsystem_event' notation is fine. + * + * Think about this whole construct as the + * 'trace_sched_switch() function' from now on. + * + * + * TRACE_EVENT(sched_switch, + * + * * + * * A function has a regular function arguments + * * prototype, declare it via TP_PROTO(): + * * + * + * TP_PROTO(struct rq *rq, struct task_struct *prev, + * struct task_struct *next), + * + * * + * * Define the call signature of the 'function'. + * * (Design sidenote: we use this instead of a + * * TP_PROTO1/TP_PROTO2/TP_PROTO3 ugliness.) + * * + * + * TP_ARGS(rq, prev, next), + * + * * + * * Fast binary tracing: define the trace record via + * * TP_STRUCT__entry(). You can think about it like a + * * regular C structure local variable definition. + * * + * * This is how the trace record is structured and will + * * be saved into the ring buffer. These are the fields + * * that will be exposed to user-space in + * * /debug/tracing/events/<*>/format. + * * + * * The declared 'local variable' is called '__entry' + * * + * * __field(pid_t, prev_prid) is equivalent to a standard declariton: + * * + * * pid_t prev_pid; + * * + * * __array(char, prev_comm, TASK_COMM_LEN) is equivalent to: + * * + * * char prev_comm[TASK_COMM_LEN]; + * * + * + * TP_STRUCT__entry( + * __array( char, prev_comm, TASK_COMM_LEN ) + * __field( pid_t, prev_pid ) + * __field( int, prev_prio ) + * __array( char, next_comm, TASK_COMM_LEN ) + * __field( pid_t, next_pid ) + * __field( int, next_prio ) + * ), + * + * * + * * Assign the entry into the trace record, by embedding + * * a full C statement block into TP_fast_assign(). You + * * can refer to the trace record as '__entry' - + * * otherwise you can put arbitrary C code in here. + * * + * * Note: this C code will execute every time a trace event + * * happens, on an active tracepoint. + * * + * + * TP_fast_assign( + * memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); + * __entry->prev_pid = prev->pid; + * __entry->prev_prio = prev->prio; + * memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); + * __entry->next_pid = next->pid; + * __entry->next_prio = next->prio; + * ) + * + * * + * * Formatted output of a trace record via TP_printk(). + * * This is how the tracepoint will appear under ftrace + * * plugins that make use of this tracepoint. + * * + * * (raw-binary tracing wont actually perform this step.) + * * + * + * TP_printk("task %s:%d [%d] ==> %s:%d [%d]", + * __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, + * __entry->next_comm, __entry->next_pid, __entry->next_prio), + * + * ); + * + * This macro construct is thus used for the regular printk format + * tracing setup, it is used to construct a function pointer based + * tracepoint callback (this is used by programmatic plugins and + * can also by used by generic instrumentation like SystemTap), and + * it is also used to expose a structured trace record in + * /debug/tracing/events/. + */ + +#define TRACE_EVENT(name, proto, args, struct, assign, print) \ + DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) + #endif |