aboutsummaryrefslogtreecommitdiff
path: root/include/linux/sched.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/sched.h')
-rw-r--r--include/linux/sched.h124
1 files changed, 98 insertions, 26 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3d9120c5ad1..8478f334d73 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -287,7 +287,6 @@ extern void trap_init(void);
extern void account_process_tick(struct task_struct *task, int user);
extern void update_process_times(int user);
extern void scheduler_tick(void);
-extern void hrtick_resched(void);
extern void sched_show_task(struct task_struct *p);
@@ -352,7 +351,7 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
extern void arch_unmap_area(struct mm_struct *, unsigned long);
extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
-#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
+#if USE_SPLIT_PTLOCKS
/*
* The mm counters are not protected by its page_table_lock,
* so must be incremented atomically.
@@ -363,7 +362,7 @@ extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
#define inc_mm_counter(mm, member) atomic_long_inc(&(mm)->_##member)
#define dec_mm_counter(mm, member) atomic_long_dec(&(mm)->_##member)
-#else /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
+#else /* !USE_SPLIT_PTLOCKS */
/*
* The mm counters are protected by its page_table_lock,
* so can be incremented directly.
@@ -374,7 +373,7 @@ extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
#define inc_mm_counter(mm, member) (mm)->_##member++
#define dec_mm_counter(mm, member) (mm)->_##member--
-#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
+#endif /* !USE_SPLIT_PTLOCKS */
#define get_mm_rss(mm) \
(get_mm_counter(mm, file_rss) + get_mm_counter(mm, anon_rss))
@@ -403,12 +402,21 @@ extern int get_dumpable(struct mm_struct *mm);
#define MMF_DUMP_MAPPED_PRIVATE 4
#define MMF_DUMP_MAPPED_SHARED 5
#define MMF_DUMP_ELF_HEADERS 6
+#define MMF_DUMP_HUGETLB_PRIVATE 7
+#define MMF_DUMP_HUGETLB_SHARED 8
#define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS
-#define MMF_DUMP_FILTER_BITS 5
+#define MMF_DUMP_FILTER_BITS 7
#define MMF_DUMP_FILTER_MASK \
(((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT)
#define MMF_DUMP_FILTER_DEFAULT \
- ((1 << MMF_DUMP_ANON_PRIVATE) | (1 << MMF_DUMP_ANON_SHARED))
+ ((1 << MMF_DUMP_ANON_PRIVATE) | (1 << MMF_DUMP_ANON_SHARED) |\
+ (1 << MMF_DUMP_HUGETLB_PRIVATE) | MMF_DUMP_MASK_DEFAULT_ELF)
+
+#ifdef CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS
+# define MMF_DUMP_MASK_DEFAULT_ELF (1 << MMF_DUMP_ELF_HEADERS)
+#else
+# define MMF_DUMP_MASK_DEFAULT_ELF 0
+#endif
struct sighand_struct {
atomic_t count;
@@ -425,6 +433,39 @@ struct pacct_struct {
unsigned long ac_minflt, ac_majflt;
};
+/**
+ * struct task_cputime - collected CPU time counts
+ * @utime: time spent in user mode, in &cputime_t units
+ * @stime: time spent in kernel mode, in &cputime_t units
+ * @sum_exec_runtime: total time spent on the CPU, in nanoseconds
+ *
+ * This structure groups together three kinds of CPU time that are
+ * tracked for threads and thread groups. Most things considering
+ * CPU time want to group these counts together and treat all three
+ * of them in parallel.
+ */
+struct task_cputime {
+ cputime_t utime;
+ cputime_t stime;
+ unsigned long long sum_exec_runtime;
+};
+/* Alternate field names when used to cache expirations. */
+#define prof_exp stime
+#define virt_exp utime
+#define sched_exp sum_exec_runtime
+
+/**
+ * struct thread_group_cputime - thread group interval timer counts
+ * @totals: thread group interval timers; substructure for
+ * uniprocessor kernel, per-cpu for SMP kernel.
+ *
+ * This structure contains the version of task_cputime, above, that is
+ * used for thread group CPU clock calculations.
+ */
+struct thread_group_cputime {
+ struct task_cputime *totals;
+};
+
/*
* NOTE! "signal_struct" does not have it's own
* locking, because a shared signal_struct always
@@ -451,8 +492,8 @@ struct signal_struct {
* - everyone except group_exit_task is stopped during signal delivery
* of fatal signals, group_exit_task processes the signal.
*/
- struct task_struct *group_exit_task;
int notify_count;
+ struct task_struct *group_exit_task;
/* thread group stop support, overloads group_exit_code too */
int group_stop_count;
@@ -470,6 +511,17 @@ struct signal_struct {
cputime_t it_prof_expires, it_virt_expires;
cputime_t it_prof_incr, it_virt_incr;
+ /*
+ * Thread group totals for process CPU clocks.
+ * See thread_group_cputime(), et al, for details.
+ */
+ struct thread_group_cputime cputime;
+
+ /* Earliest-expiration cache. */
+ struct task_cputime cputime_expires;
+
+ struct list_head cpu_timers[3];
+
/* job control IDs */
/*
@@ -500,7 +552,7 @@ struct signal_struct {
* Live threads maintain their own counters and add to these
* in __exit_signal, except for the group leader.
*/
- cputime_t utime, stime, cutime, cstime;
+ cputime_t cutime, cstime;
cputime_t gtime;
cputime_t cgtime;
unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
@@ -509,14 +561,6 @@ struct signal_struct {
struct task_io_accounting ioac;
/*
- * Cumulative ns of scheduled CPU time for dead threads in the
- * group, not including a zombie group leader. (This only differs
- * from jiffies_to_ns(utime + stime) if sched_clock uses something
- * other than jiffies.)
- */
- unsigned long long sum_sched_runtime;
-
- /*
* We don't bother to synchronize most readers of this at all,
* because there is no reader checking a limit that actually needs
* to get both rlim_cur and rlim_max atomically, and either one
@@ -527,8 +571,6 @@ struct signal_struct {
*/
struct rlimit rlim[RLIM_NLIMITS];
- struct list_head cpu_timers[3];
-
/* keep the process-shared keyrings here so that they do the right
* thing in threads created with CLONE_THREAD */
#ifdef CONFIG_KEYS
@@ -638,10 +680,6 @@ struct sched_info {
};
#endif /* defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) */
-#ifdef CONFIG_SCHEDSTATS
-extern const struct file_operations proc_schedstat_operations;
-#endif /* CONFIG_SCHEDSTATS */
-
#ifdef CONFIG_TASK_DELAY_ACCT
struct task_delay_info {
spinlock_t lock;
@@ -824,6 +862,9 @@ struct sched_domain {
unsigned int ttwu_move_affine;
unsigned int ttwu_move_balance;
#endif
+#ifdef CONFIG_SCHED_DEBUG
+ char *name;
+#endif
};
extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
@@ -897,7 +938,7 @@ struct sched_class {
void (*yield_task) (struct rq *rq);
int (*select_task_rq)(struct task_struct *p, int sync);
- void (*check_preempt_curr) (struct rq *rq, struct task_struct *p);
+ void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int sync);
struct task_struct * (*pick_next_task) (struct rq *rq);
void (*put_prev_task) (struct rq *rq, struct task_struct *p);
@@ -1010,8 +1051,8 @@ struct sched_entity {
struct sched_rt_entity {
struct list_head run_list;
- unsigned int time_slice;
unsigned long timeout;
+ unsigned int time_slice;
int nr_cpus_allowed;
struct sched_rt_entity *back;
@@ -1134,8 +1175,7 @@ struct task_struct {
/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
unsigned long min_flt, maj_flt;
- cputime_t it_prof_expires, it_virt_expires;
- unsigned long long it_sched_expires;
+ struct task_cputime cputime_expires;
struct list_head cpu_timers[3];
/* process credentials */
@@ -1301,6 +1341,12 @@ struct task_struct {
int latency_record_count;
struct latency_record latency_record[LT_SAVECOUNT];
#endif
+ /*
+ * time slack values; these are used to round up poll() and
+ * select() etc timeout values. These are in nanoseconds.
+ */
+ unsigned long timer_slack_ns;
+ unsigned long default_timer_slack_ns;
};
/*
@@ -1585,6 +1631,7 @@ extern unsigned long long cpu_clock(int cpu);
extern unsigned long long
task_sched_runtime(struct task_struct *task);
+extern unsigned long long thread_group_sched_runtime(struct task_struct *task);
/* sched_exec is called by processes performing an exec */
#ifdef CONFIG_SMP
@@ -1619,6 +1666,7 @@ extern unsigned int sysctl_sched_features;
extern unsigned int sysctl_sched_migration_cost;
extern unsigned int sysctl_sched_nr_migrate;
extern unsigned int sysctl_sched_shares_ratelimit;
+extern unsigned int sysctl_sched_shares_thresh;
int sched_nr_latency_handler(struct ctl_table *table, int write,
struct file *file, void __user *buffer, size_t *length,
@@ -2082,6 +2130,30 @@ static inline int spin_needbreak(spinlock_t *lock)
}
/*
+ * Thread group CPU time accounting.
+ */
+
+extern int thread_group_cputime_alloc(struct task_struct *);
+extern void thread_group_cputime(struct task_struct *, struct task_cputime *);
+
+static inline void thread_group_cputime_init(struct signal_struct *sig)
+{
+ sig->cputime.totals = NULL;
+}
+
+static inline int thread_group_cputime_clone_thread(struct task_struct *curr)
+{
+ if (curr->signal->cputime.totals)
+ return 0;
+ return thread_group_cputime_alloc(curr);
+}
+
+static inline void thread_group_cputime_free(struct signal_struct *sig)
+{
+ free_percpu(sig->cputime.totals);
+}
+
+/*
* Reevaluate whether the task has signals pending delivery.
* Wake the task if so.
* This is required every time the blocked sigset_t changes.