This source file includes following definitions.
- sched_rt_period_timer
- init_rt_bandwidth
- start_rt_bandwidth
- init_rt_rq
- destroy_rt_bandwidth
- rt_task_of
- rq_of_rt_rq
- rt_rq_of_se
- rq_of_rt_se
- free_rt_sched_group
- init_tg_rt_entry
- alloc_rt_sched_group
- rt_task_of
- rq_of_rt_rq
- rq_of_rt_se
- rt_rq_of_se
- free_rt_sched_group
- alloc_rt_sched_group
- need_pull_rt_task
- rt_overloaded
- rt_set_overload
- rt_clear_overload
- update_rt_migration
- inc_rt_migration
- dec_rt_migration
- has_pushable_tasks
- rt_queue_push_tasks
- rt_queue_pull_task
- enqueue_pushable_task
- dequeue_pushable_task
- enqueue_pushable_task
- dequeue_pushable_task
- inc_rt_migration
- dec_rt_migration
- need_pull_rt_task
- pull_rt_task
- rt_queue_push_tasks
- on_rt_rq
- sched_rt_runtime
- sched_rt_period
- next_task_group
- group_rt_rq
- sched_rt_rq_enqueue
- sched_rt_rq_dequeue
- rt_rq_throttled
- rt_se_boosted
- sched_rt_period_mask
- sched_rt_period_mask
- sched_rt_period_rt_rq
- sched_rt_bandwidth
- sched_rt_runtime
- sched_rt_period
- group_rt_rq
- sched_rt_rq_enqueue
- sched_rt_rq_dequeue
- rt_rq_throttled
- sched_rt_period_mask
- sched_rt_period_rt_rq
- sched_rt_bandwidth
- sched_rt_bandwidth_account
- do_balance_runtime
- __disable_runtime
- __enable_runtime
- balance_runtime
- balance_runtime
- do_sched_rt_period_timer
- rt_se_prio
- sched_rt_runtime_exceeded
- update_curr_rt
- dequeue_top_rt_rq
- enqueue_top_rt_rq
- inc_rt_prio_smp
- dec_rt_prio_smp
- inc_rt_prio_smp
- dec_rt_prio_smp
- inc_rt_prio
- dec_rt_prio
- inc_rt_prio
- dec_rt_prio
- inc_rt_group
- dec_rt_group
- inc_rt_group
- dec_rt_group
- rt_se_nr_running
- rt_se_rr_nr_running
- inc_rt_tasks
- dec_rt_tasks
- move_entity
- __delist_rt_entity
- __enqueue_rt_entity
- __dequeue_rt_entity
- dequeue_rt_stack
- enqueue_rt_entity
- dequeue_rt_entity
- enqueue_task_rt
- dequeue_task_rt
- requeue_rt_entity
- requeue_task_rt
- yield_task_rt
- select_task_rq_rt
- check_preempt_equal_prio
- balance_rt
- check_preempt_curr_rt
- set_next_task_rt
- pick_next_rt_entity
- _pick_next_task_rt
- pick_next_task_rt
- put_prev_task_rt
- pick_rt_task
- pick_highest_pushable_task
- find_lowest_rq
- find_lock_lowest_rq
- pick_next_pushable_task
- push_rt_task
- push_rt_tasks
- rto_next_cpu
- rto_start_trylock
- rto_start_unlock
- tell_cpu_to_push
- rto_push_irq_work_func
- pull_rt_task
- task_woken_rt
- rq_online_rt
- rq_offline_rt
- switched_from_rt
- init_sched_rt_class
- switched_to_rt
- prio_changed_rt
- watchdog
- watchdog
- task_tick_rt
- get_rr_interval_rt
- tg_has_rt_tasks
- tg_rt_schedulable
- __rt_schedulable
- tg_set_rt_bandwidth
- sched_group_set_rt_runtime
- sched_group_rt_runtime
- sched_group_set_rt_period
- sched_group_rt_period
- sched_rt_global_constraints
- sched_rt_can_attach
- sched_rt_global_constraints
- sched_rt_global_validate
- sched_rt_do_global
- sched_rt_handler
- sched_rr_handler
- print_rt_stats
1
2
3
4
5
6 #include "sched.h"
7
8 #include "pelt.h"
9
10 int sched_rr_timeslice = RR_TIMESLICE;
11 int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE;
12
13 static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun);
14
15 struct rt_bandwidth def_rt_bandwidth;
16
17 static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer)
18 {
19 struct rt_bandwidth *rt_b =
20 container_of(timer, struct rt_bandwidth, rt_period_timer);
21 int idle = 0;
22 int overrun;
23
24 raw_spin_lock(&rt_b->rt_runtime_lock);
25 for (;;) {
26 overrun = hrtimer_forward_now(timer, rt_b->rt_period);
27 if (!overrun)
28 break;
29
30 raw_spin_unlock(&rt_b->rt_runtime_lock);
31 idle = do_sched_rt_period_timer(rt_b, overrun);
32 raw_spin_lock(&rt_b->rt_runtime_lock);
33 }
34 if (idle)
35 rt_b->rt_period_active = 0;
36 raw_spin_unlock(&rt_b->rt_runtime_lock);
37
38 return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
39 }
40
41 void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
42 {
43 rt_b->rt_period = ns_to_ktime(period);
44 rt_b->rt_runtime = runtime;
45
46 raw_spin_lock_init(&rt_b->rt_runtime_lock);
47
48 hrtimer_init(&rt_b->rt_period_timer, CLOCK_MONOTONIC,
49 HRTIMER_MODE_REL_HARD);
50 rt_b->rt_period_timer.function = sched_rt_period_timer;
51 }
52
53 static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
54 {
55 if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
56 return;
57
58 raw_spin_lock(&rt_b->rt_runtime_lock);
59 if (!rt_b->rt_period_active) {
60 rt_b->rt_period_active = 1;
61
62
63
64
65
66
67
68
69 hrtimer_forward_now(&rt_b->rt_period_timer, ns_to_ktime(0));
70 hrtimer_start_expires(&rt_b->rt_period_timer,
71 HRTIMER_MODE_ABS_PINNED_HARD);
72 }
73 raw_spin_unlock(&rt_b->rt_runtime_lock);
74 }
75
76 void init_rt_rq(struct rt_rq *rt_rq)
77 {
78 struct rt_prio_array *array;
79 int i;
80
81 array = &rt_rq->active;
82 for (i = 0; i < MAX_RT_PRIO; i++) {
83 INIT_LIST_HEAD(array->queue + i);
84 __clear_bit(i, array->bitmap);
85 }
86
87 __set_bit(MAX_RT_PRIO, array->bitmap);
88
89 #if defined CONFIG_SMP
90 rt_rq->highest_prio.curr = MAX_RT_PRIO;
91 rt_rq->highest_prio.next = MAX_RT_PRIO;
92 rt_rq->rt_nr_migratory = 0;
93 rt_rq->overloaded = 0;
94 plist_head_init(&rt_rq->pushable_tasks);
95 #endif
96
97 rt_rq->rt_queued = 0;
98
99 rt_rq->rt_time = 0;
100 rt_rq->rt_throttled = 0;
101 rt_rq->rt_runtime = 0;
102 raw_spin_lock_init(&rt_rq->rt_runtime_lock);
103 }
104
105 #ifdef CONFIG_RT_GROUP_SCHED
106 static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b)
107 {
108 hrtimer_cancel(&rt_b->rt_period_timer);
109 }
110
111 #define rt_entity_is_task(rt_se) (!(rt_se)->my_q)
112
113 static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se)
114 {
115 #ifdef CONFIG_SCHED_DEBUG
116 WARN_ON_ONCE(!rt_entity_is_task(rt_se));
117 #endif
118 return container_of(rt_se, struct task_struct, rt);
119 }
120
121 static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
122 {
123 return rt_rq->rq;
124 }
125
126 static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
127 {
128 return rt_se->rt_rq;
129 }
130
131 static inline struct rq *rq_of_rt_se(struct sched_rt_entity *rt_se)
132 {
133 struct rt_rq *rt_rq = rt_se->rt_rq;
134
135 return rt_rq->rq;
136 }
137
138 void free_rt_sched_group(struct task_group *tg)
139 {
140 int i;
141
142 if (tg->rt_se)
143 destroy_rt_bandwidth(&tg->rt_bandwidth);
144
145 for_each_possible_cpu(i) {
146 if (tg->rt_rq)
147 kfree(tg->rt_rq[i]);
148 if (tg->rt_se)
149 kfree(tg->rt_se[i]);
150 }
151
152 kfree(tg->rt_rq);
153 kfree(tg->rt_se);
154 }
155
156 void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
157 struct sched_rt_entity *rt_se, int cpu,
158 struct sched_rt_entity *parent)
159 {
160 struct rq *rq = cpu_rq(cpu);
161
162 rt_rq->highest_prio.curr = MAX_RT_PRIO;
163 rt_rq->rt_nr_boosted = 0;
164 rt_rq->rq = rq;
165 rt_rq->tg = tg;
166
167 tg->rt_rq[cpu] = rt_rq;
168 tg->rt_se[cpu] = rt_se;
169
170 if (!rt_se)
171 return;
172
173 if (!parent)
174 rt_se->rt_rq = &rq->rt;
175 else
176 rt_se->rt_rq = parent->my_q;
177
178 rt_se->my_q = rt_rq;
179 rt_se->parent = parent;
180 INIT_LIST_HEAD(&rt_se->run_list);
181 }
182
183 int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
184 {
185 struct rt_rq *rt_rq;
186 struct sched_rt_entity *rt_se;
187 int i;
188
189 tg->rt_rq = kcalloc(nr_cpu_ids, sizeof(rt_rq), GFP_KERNEL);
190 if (!tg->rt_rq)
191 goto err;
192 tg->rt_se = kcalloc(nr_cpu_ids, sizeof(rt_se), GFP_KERNEL);
193 if (!tg->rt_se)
194 goto err;
195
196 init_rt_bandwidth(&tg->rt_bandwidth,
197 ktime_to_ns(def_rt_bandwidth.rt_period), 0);
198
199 for_each_possible_cpu(i) {
200 rt_rq = kzalloc_node(sizeof(struct rt_rq),
201 GFP_KERNEL, cpu_to_node(i));
202 if (!rt_rq)
203 goto err;
204
205 rt_se = kzalloc_node(sizeof(struct sched_rt_entity),
206 GFP_KERNEL, cpu_to_node(i));
207 if (!rt_se)
208 goto err_free_rq;
209
210 init_rt_rq(rt_rq);
211 rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime;
212 init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]);
213 }
214
215 return 1;
216
217 err_free_rq:
218 kfree(rt_rq);
219 err:
220 return 0;
221 }
222
223 #else
224
225 #define rt_entity_is_task(rt_se) (1)
226
227 static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se)
228 {
229 return container_of(rt_se, struct task_struct, rt);
230 }
231
232 static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
233 {
234 return container_of(rt_rq, struct rq, rt);
235 }
236
237 static inline struct rq *rq_of_rt_se(struct sched_rt_entity *rt_se)
238 {
239 struct task_struct *p = rt_task_of(rt_se);
240
241 return task_rq(p);
242 }
243
244 static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
245 {
246 struct rq *rq = rq_of_rt_se(rt_se);
247
248 return &rq->rt;
249 }
250
251 void free_rt_sched_group(struct task_group *tg) { }
252
253 int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
254 {
255 return 1;
256 }
257 #endif
258
259 #ifdef CONFIG_SMP
260
261 static void pull_rt_task(struct rq *this_rq);
262
263 static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev)
264 {
265
266 return rq->rt.highest_prio.curr > prev->prio;
267 }
268
269 static inline int rt_overloaded(struct rq *rq)
270 {
271 return atomic_read(&rq->rd->rto_count);
272 }
273
274 static inline void rt_set_overload(struct rq *rq)
275 {
276 if (!rq->online)
277 return;
278
279 cpumask_set_cpu(rq->cpu, rq->rd->rto_mask);
280
281
282
283
284
285
286
287
288
289 smp_wmb();
290 atomic_inc(&rq->rd->rto_count);
291 }
292
293 static inline void rt_clear_overload(struct rq *rq)
294 {
295 if (!rq->online)
296 return;
297
298
299 atomic_dec(&rq->rd->rto_count);
300 cpumask_clear_cpu(rq->cpu, rq->rd->rto_mask);
301 }
302
303 static void update_rt_migration(struct rt_rq *rt_rq)
304 {
305 if (rt_rq->rt_nr_migratory && rt_rq->rt_nr_total > 1) {
306 if (!rt_rq->overloaded) {
307 rt_set_overload(rq_of_rt_rq(rt_rq));
308 rt_rq->overloaded = 1;
309 }
310 } else if (rt_rq->overloaded) {
311 rt_clear_overload(rq_of_rt_rq(rt_rq));
312 rt_rq->overloaded = 0;
313 }
314 }
315
316 static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
317 {
318 struct task_struct *p;
319
320 if (!rt_entity_is_task(rt_se))
321 return;
322
323 p = rt_task_of(rt_se);
324 rt_rq = &rq_of_rt_rq(rt_rq)->rt;
325
326 rt_rq->rt_nr_total++;
327 if (p->nr_cpus_allowed > 1)
328 rt_rq->rt_nr_migratory++;
329
330 update_rt_migration(rt_rq);
331 }
332
333 static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
334 {
335 struct task_struct *p;
336
337 if (!rt_entity_is_task(rt_se))
338 return;
339
340 p = rt_task_of(rt_se);
341 rt_rq = &rq_of_rt_rq(rt_rq)->rt;
342
343 rt_rq->rt_nr_total--;
344 if (p->nr_cpus_allowed > 1)
345 rt_rq->rt_nr_migratory--;
346
347 update_rt_migration(rt_rq);
348 }
349
350 static inline int has_pushable_tasks(struct rq *rq)
351 {
352 return !plist_head_empty(&rq->rt.pushable_tasks);
353 }
354
355 static DEFINE_PER_CPU(struct callback_head, rt_push_head);
356 static DEFINE_PER_CPU(struct callback_head, rt_pull_head);
357
358 static void push_rt_tasks(struct rq *);
359 static void pull_rt_task(struct rq *);
360
361 static inline void rt_queue_push_tasks(struct rq *rq)
362 {
363 if (!has_pushable_tasks(rq))
364 return;
365
366 queue_balance_callback(rq, &per_cpu(rt_push_head, rq->cpu), push_rt_tasks);
367 }
368
369 static inline void rt_queue_pull_task(struct rq *rq)
370 {
371 queue_balance_callback(rq, &per_cpu(rt_pull_head, rq->cpu), pull_rt_task);
372 }
373
374 static void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
375 {
376 plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
377 plist_node_init(&p->pushable_tasks, p->prio);
378 plist_add(&p->pushable_tasks, &rq->rt.pushable_tasks);
379
380
381 if (p->prio < rq->rt.highest_prio.next)
382 rq->rt.highest_prio.next = p->prio;
383 }
384
385 static void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
386 {
387 plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
388
389
390 if (has_pushable_tasks(rq)) {
391 p = plist_first_entry(&rq->rt.pushable_tasks,
392 struct task_struct, pushable_tasks);
393 rq->rt.highest_prio.next = p->prio;
394 } else
395 rq->rt.highest_prio.next = MAX_RT_PRIO;
396 }
397
398 #else
399
400 static inline void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
401 {
402 }
403
404 static inline void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
405 {
406 }
407
408 static inline
409 void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
410 {
411 }
412
413 static inline
414 void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
415 {
416 }
417
418 static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev)
419 {
420 return false;
421 }
422
423 static inline void pull_rt_task(struct rq *this_rq)
424 {
425 }
426
427 static inline void rt_queue_push_tasks(struct rq *rq)
428 {
429 }
430 #endif
431
432 static void enqueue_top_rt_rq(struct rt_rq *rt_rq);
433 static void dequeue_top_rt_rq(struct rt_rq *rt_rq);
434
435 static inline int on_rt_rq(struct sched_rt_entity *rt_se)
436 {
437 return rt_se->on_rq;
438 }
439
440 #ifdef CONFIG_RT_GROUP_SCHED
441
442 static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
443 {
444 if (!rt_rq->tg)
445 return RUNTIME_INF;
446
447 return rt_rq->rt_runtime;
448 }
449
450 static inline u64 sched_rt_period(struct rt_rq *rt_rq)
451 {
452 return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period);
453 }
454
455 typedef struct task_group *rt_rq_iter_t;
456
457 static inline struct task_group *next_task_group(struct task_group *tg)
458 {
459 do {
460 tg = list_entry_rcu(tg->list.next,
461 typeof(struct task_group), list);
462 } while (&tg->list != &task_groups && task_group_is_autogroup(tg));
463
464 if (&tg->list == &task_groups)
465 tg = NULL;
466
467 return tg;
468 }
469
470 #define for_each_rt_rq(rt_rq, iter, rq) \
471 for (iter = container_of(&task_groups, typeof(*iter), list); \
472 (iter = next_task_group(iter)) && \
473 (rt_rq = iter->rt_rq[cpu_of(rq)]);)
474
475 #define for_each_sched_rt_entity(rt_se) \
476 for (; rt_se; rt_se = rt_se->parent)
477
478 static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
479 {
480 return rt_se->my_q;
481 }
482
483 static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags);
484 static void dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags);
485
486 static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
487 {
488 struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;
489 struct rq *rq = rq_of_rt_rq(rt_rq);
490 struct sched_rt_entity *rt_se;
491
492 int cpu = cpu_of(rq);
493
494 rt_se = rt_rq->tg->rt_se[cpu];
495
496 if (rt_rq->rt_nr_running) {
497 if (!rt_se)
498 enqueue_top_rt_rq(rt_rq);
499 else if (!on_rt_rq(rt_se))
500 enqueue_rt_entity(rt_se, 0);
501
502 if (rt_rq->highest_prio.curr < curr->prio)
503 resched_curr(rq);
504 }
505 }
506
507 static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
508 {
509 struct sched_rt_entity *rt_se;
510 int cpu = cpu_of(rq_of_rt_rq(rt_rq));
511
512 rt_se = rt_rq->tg->rt_se[cpu];
513
514 if (!rt_se) {
515 dequeue_top_rt_rq(rt_rq);
516
517 cpufreq_update_util(rq_of_rt_rq(rt_rq), 0);
518 }
519 else if (on_rt_rq(rt_se))
520 dequeue_rt_entity(rt_se, 0);
521 }
522
523 static inline int rt_rq_throttled(struct rt_rq *rt_rq)
524 {
525 return rt_rq->rt_throttled && !rt_rq->rt_nr_boosted;
526 }
527
528 static int rt_se_boosted(struct sched_rt_entity *rt_se)
529 {
530 struct rt_rq *rt_rq = group_rt_rq(rt_se);
531 struct task_struct *p;
532
533 if (rt_rq)
534 return !!rt_rq->rt_nr_boosted;
535
536 p = rt_task_of(rt_se);
537 return p->prio != p->normal_prio;
538 }
539
540 #ifdef CONFIG_SMP
541 static inline const struct cpumask *sched_rt_period_mask(void)
542 {
543 return this_rq()->rd->span;
544 }
545 #else
546 static inline const struct cpumask *sched_rt_period_mask(void)
547 {
548 return cpu_online_mask;
549 }
550 #endif
551
552 static inline
553 struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu)
554 {
555 return container_of(rt_b, struct task_group, rt_bandwidth)->rt_rq[cpu];
556 }
557
558 static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
559 {
560 return &rt_rq->tg->rt_bandwidth;
561 }
562
563 #else
564
565 static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
566 {
567 return rt_rq->rt_runtime;
568 }
569
570 static inline u64 sched_rt_period(struct rt_rq *rt_rq)
571 {
572 return ktime_to_ns(def_rt_bandwidth.rt_period);
573 }
574
575 typedef struct rt_rq *rt_rq_iter_t;
576
577 #define for_each_rt_rq(rt_rq, iter, rq) \
578 for ((void) iter, rt_rq = &rq->rt; rt_rq; rt_rq = NULL)
579
580 #define for_each_sched_rt_entity(rt_se) \
581 for (; rt_se; rt_se = NULL)
582
583 static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
584 {
585 return NULL;
586 }
587
588 static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
589 {
590 struct rq *rq = rq_of_rt_rq(rt_rq);
591
592 if (!rt_rq->rt_nr_running)
593 return;
594
595 enqueue_top_rt_rq(rt_rq);
596 resched_curr(rq);
597 }
598
599 static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
600 {
601 dequeue_top_rt_rq(rt_rq);
602 }
603
604 static inline int rt_rq_throttled(struct rt_rq *rt_rq)
605 {
606 return rt_rq->rt_throttled;
607 }
608
609 static inline const struct cpumask *sched_rt_period_mask(void)
610 {
611 return cpu_online_mask;
612 }
613
614 static inline
615 struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu)
616 {
617 return &cpu_rq(cpu)->rt;
618 }
619
620 static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
621 {
622 return &def_rt_bandwidth;
623 }
624
625 #endif
626
627 bool sched_rt_bandwidth_account(struct rt_rq *rt_rq)
628 {
629 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
630
631 return (hrtimer_active(&rt_b->rt_period_timer) ||
632 rt_rq->rt_time < rt_b->rt_runtime);
633 }
634
635 #ifdef CONFIG_SMP
636
637
638
639 static void do_balance_runtime(struct rt_rq *rt_rq)
640 {
641 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
642 struct root_domain *rd = rq_of_rt_rq(rt_rq)->rd;
643 int i, weight;
644 u64 rt_period;
645
646 weight = cpumask_weight(rd->span);
647
648 raw_spin_lock(&rt_b->rt_runtime_lock);
649 rt_period = ktime_to_ns(rt_b->rt_period);
650 for_each_cpu(i, rd->span) {
651 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
652 s64 diff;
653
654 if (iter == rt_rq)
655 continue;
656
657 raw_spin_lock(&iter->rt_runtime_lock);
658
659
660
661
662
663 if (iter->rt_runtime == RUNTIME_INF)
664 goto next;
665
666
667
668
669
670 diff = iter->rt_runtime - iter->rt_time;
671 if (diff > 0) {
672 diff = div_u64((u64)diff, weight);
673 if (rt_rq->rt_runtime + diff > rt_period)
674 diff = rt_period - rt_rq->rt_runtime;
675 iter->rt_runtime -= diff;
676 rt_rq->rt_runtime += diff;
677 if (rt_rq->rt_runtime == rt_period) {
678 raw_spin_unlock(&iter->rt_runtime_lock);
679 break;
680 }
681 }
682 next:
683 raw_spin_unlock(&iter->rt_runtime_lock);
684 }
685 raw_spin_unlock(&rt_b->rt_runtime_lock);
686 }
687
688
689
690
691 static void __disable_runtime(struct rq *rq)
692 {
693 struct root_domain *rd = rq->rd;
694 rt_rq_iter_t iter;
695 struct rt_rq *rt_rq;
696
697 if (unlikely(!scheduler_running))
698 return;
699
700 for_each_rt_rq(rt_rq, iter, rq) {
701 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
702 s64 want;
703 int i;
704
705 raw_spin_lock(&rt_b->rt_runtime_lock);
706 raw_spin_lock(&rt_rq->rt_runtime_lock);
707
708
709
710
711
712 if (rt_rq->rt_runtime == RUNTIME_INF ||
713 rt_rq->rt_runtime == rt_b->rt_runtime)
714 goto balanced;
715 raw_spin_unlock(&rt_rq->rt_runtime_lock);
716
717
718
719
720
721
722 want = rt_b->rt_runtime - rt_rq->rt_runtime;
723
724
725
726
727 for_each_cpu(i, rd->span) {
728 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
729 s64 diff;
730
731
732
733
734 if (iter == rt_rq || iter->rt_runtime == RUNTIME_INF)
735 continue;
736
737 raw_spin_lock(&iter->rt_runtime_lock);
738 if (want > 0) {
739 diff = min_t(s64, iter->rt_runtime, want);
740 iter->rt_runtime -= diff;
741 want -= diff;
742 } else {
743 iter->rt_runtime -= want;
744 want -= want;
745 }
746 raw_spin_unlock(&iter->rt_runtime_lock);
747
748 if (!want)
749 break;
750 }
751
752 raw_spin_lock(&rt_rq->rt_runtime_lock);
753
754
755
756
757 BUG_ON(want);
758 balanced:
759
760
761
762
763 rt_rq->rt_runtime = RUNTIME_INF;
764 rt_rq->rt_throttled = 0;
765 raw_spin_unlock(&rt_rq->rt_runtime_lock);
766 raw_spin_unlock(&rt_b->rt_runtime_lock);
767
768
769 sched_rt_rq_enqueue(rt_rq);
770 }
771 }
772
773 static void __enable_runtime(struct rq *rq)
774 {
775 rt_rq_iter_t iter;
776 struct rt_rq *rt_rq;
777
778 if (unlikely(!scheduler_running))
779 return;
780
781
782
783
784 for_each_rt_rq(rt_rq, iter, rq) {
785 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
786
787 raw_spin_lock(&rt_b->rt_runtime_lock);
788 raw_spin_lock(&rt_rq->rt_runtime_lock);
789 rt_rq->rt_runtime = rt_b->rt_runtime;
790 rt_rq->rt_time = 0;
791 rt_rq->rt_throttled = 0;
792 raw_spin_unlock(&rt_rq->rt_runtime_lock);
793 raw_spin_unlock(&rt_b->rt_runtime_lock);
794 }
795 }
796
797 static void balance_runtime(struct rt_rq *rt_rq)
798 {
799 if (!sched_feat(RT_RUNTIME_SHARE))
800 return;
801
802 if (rt_rq->rt_time > rt_rq->rt_runtime) {
803 raw_spin_unlock(&rt_rq->rt_runtime_lock);
804 do_balance_runtime(rt_rq);
805 raw_spin_lock(&rt_rq->rt_runtime_lock);
806 }
807 }
808 #else
809 static inline void balance_runtime(struct rt_rq *rt_rq) {}
810 #endif
811
812 static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
813 {
814 int i, idle = 1, throttled = 0;
815 const struct cpumask *span;
816
817 span = sched_rt_period_mask();
818 #ifdef CONFIG_RT_GROUP_SCHED
819
820
821
822
823
824
825
826
827
828 if (rt_b == &root_task_group.rt_bandwidth)
829 span = cpu_online_mask;
830 #endif
831 for_each_cpu(i, span) {
832 int enqueue = 0;
833 struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
834 struct rq *rq = rq_of_rt_rq(rt_rq);
835 int skip;
836
837
838
839
840
841 raw_spin_lock(&rt_rq->rt_runtime_lock);
842 if (!sched_feat(RT_RUNTIME_SHARE) && rt_rq->rt_runtime != RUNTIME_INF)
843 rt_rq->rt_runtime = rt_b->rt_runtime;
844 skip = !rt_rq->rt_time && !rt_rq->rt_nr_running;
845 raw_spin_unlock(&rt_rq->rt_runtime_lock);
846 if (skip)
847 continue;
848
849 raw_spin_lock(&rq->lock);
850 update_rq_clock(rq);
851
852 if (rt_rq->rt_time) {
853 u64 runtime;
854
855 raw_spin_lock(&rt_rq->rt_runtime_lock);
856 if (rt_rq->rt_throttled)
857 balance_runtime(rt_rq);
858 runtime = rt_rq->rt_runtime;
859 rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime);
860 if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) {
861 rt_rq->rt_throttled = 0;
862 enqueue = 1;
863
864
865
866
867
868
869
870
871 if (rt_rq->rt_nr_running && rq->curr == rq->idle)
872 rq_clock_cancel_skipupdate(rq);
873 }
874 if (rt_rq->rt_time || rt_rq->rt_nr_running)
875 idle = 0;
876 raw_spin_unlock(&rt_rq->rt_runtime_lock);
877 } else if (rt_rq->rt_nr_running) {
878 idle = 0;
879 if (!rt_rq_throttled(rt_rq))
880 enqueue = 1;
881 }
882 if (rt_rq->rt_throttled)
883 throttled = 1;
884
885 if (enqueue)
886 sched_rt_rq_enqueue(rt_rq);
887 raw_spin_unlock(&rq->lock);
888 }
889
890 if (!throttled && (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF))
891 return 1;
892
893 return idle;
894 }
895
896 static inline int rt_se_prio(struct sched_rt_entity *rt_se)
897 {
898 #ifdef CONFIG_RT_GROUP_SCHED
899 struct rt_rq *rt_rq = group_rt_rq(rt_se);
900
901 if (rt_rq)
902 return rt_rq->highest_prio.curr;
903 #endif
904
905 return rt_task_of(rt_se)->prio;
906 }
907
908 static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
909 {
910 u64 runtime = sched_rt_runtime(rt_rq);
911
912 if (rt_rq->rt_throttled)
913 return rt_rq_throttled(rt_rq);
914
915 if (runtime >= sched_rt_period(rt_rq))
916 return 0;
917
918 balance_runtime(rt_rq);
919 runtime = sched_rt_runtime(rt_rq);
920 if (runtime == RUNTIME_INF)
921 return 0;
922
923 if (rt_rq->rt_time > runtime) {
924 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
925
926
927
928
929
930 if (likely(rt_b->rt_runtime)) {
931 rt_rq->rt_throttled = 1;
932 printk_deferred_once("sched: RT throttling activated\n");
933 } else {
934
935
936
937
938
939 rt_rq->rt_time = 0;
940 }
941
942 if (rt_rq_throttled(rt_rq)) {
943 sched_rt_rq_dequeue(rt_rq);
944 return 1;
945 }
946 }
947
948 return 0;
949 }
950
951
952
953
954
955 static void update_curr_rt(struct rq *rq)
956 {
957 struct task_struct *curr = rq->curr;
958 struct sched_rt_entity *rt_se = &curr->rt;
959 u64 delta_exec;
960 u64 now;
961
962 if (curr->sched_class != &rt_sched_class)
963 return;
964
965 now = rq_clock_task(rq);
966 delta_exec = now - curr->se.exec_start;
967 if (unlikely((s64)delta_exec <= 0))
968 return;
969
970 schedstat_set(curr->se.statistics.exec_max,
971 max(curr->se.statistics.exec_max, delta_exec));
972
973 curr->se.sum_exec_runtime += delta_exec;
974 account_group_exec_runtime(curr, delta_exec);
975
976 curr->se.exec_start = now;
977 cgroup_account_cputime(curr, delta_exec);
978
979 if (!rt_bandwidth_enabled())
980 return;
981
982 for_each_sched_rt_entity(rt_se) {
983 struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
984
985 if (sched_rt_runtime(rt_rq) != RUNTIME_INF) {
986 raw_spin_lock(&rt_rq->rt_runtime_lock);
987 rt_rq->rt_time += delta_exec;
988 if (sched_rt_runtime_exceeded(rt_rq))
989 resched_curr(rq);
990 raw_spin_unlock(&rt_rq->rt_runtime_lock);
991 }
992 }
993 }
994
995 static void
996 dequeue_top_rt_rq(struct rt_rq *rt_rq)
997 {
998 struct rq *rq = rq_of_rt_rq(rt_rq);
999
1000 BUG_ON(&rq->rt != rt_rq);
1001
1002 if (!rt_rq->rt_queued)
1003 return;
1004
1005 BUG_ON(!rq->nr_running);
1006
1007 sub_nr_running(rq, rt_rq->rt_nr_running);
1008 rt_rq->rt_queued = 0;
1009
1010 }
1011
1012 static void
1013 enqueue_top_rt_rq(struct rt_rq *rt_rq)
1014 {
1015 struct rq *rq = rq_of_rt_rq(rt_rq);
1016
1017 BUG_ON(&rq->rt != rt_rq);
1018
1019 if (rt_rq->rt_queued)
1020 return;
1021
1022 if (rt_rq_throttled(rt_rq))
1023 return;
1024
1025 if (rt_rq->rt_nr_running) {
1026 add_nr_running(rq, rt_rq->rt_nr_running);
1027 rt_rq->rt_queued = 1;
1028 }
1029
1030
1031 cpufreq_update_util(rq, 0);
1032 }
1033
1034 #if defined CONFIG_SMP
1035
1036 static void
1037 inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
1038 {
1039 struct rq *rq = rq_of_rt_rq(rt_rq);
1040
1041 #ifdef CONFIG_RT_GROUP_SCHED
1042
1043
1044
1045 if (&rq->rt != rt_rq)
1046 return;
1047 #endif
1048 if (rq->online && prio < prev_prio)
1049 cpupri_set(&rq->rd->cpupri, rq->cpu, prio);
1050 }
1051
1052 static void
1053 dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
1054 {
1055 struct rq *rq = rq_of_rt_rq(rt_rq);
1056
1057 #ifdef CONFIG_RT_GROUP_SCHED
1058
1059
1060
1061 if (&rq->rt != rt_rq)
1062 return;
1063 #endif
1064 if (rq->online && rt_rq->highest_prio.curr != prev_prio)
1065 cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr);
1066 }
1067
1068 #else
1069
1070 static inline
1071 void inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {}
1072 static inline
1073 void dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {}
1074
1075 #endif
1076
1077 #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
1078 static void
1079 inc_rt_prio(struct rt_rq *rt_rq, int prio)
1080 {
1081 int prev_prio = rt_rq->highest_prio.curr;
1082
1083 if (prio < prev_prio)
1084 rt_rq->highest_prio.curr = prio;
1085
1086 inc_rt_prio_smp(rt_rq, prio, prev_prio);
1087 }
1088
1089 static void
1090 dec_rt_prio(struct rt_rq *rt_rq, int prio)
1091 {
1092 int prev_prio = rt_rq->highest_prio.curr;
1093
1094 if (rt_rq->rt_nr_running) {
1095
1096 WARN_ON(prio < prev_prio);
1097
1098
1099
1100
1101
1102 if (prio == prev_prio) {
1103 struct rt_prio_array *array = &rt_rq->active;
1104
1105 rt_rq->highest_prio.curr =
1106 sched_find_first_bit(array->bitmap);
1107 }
1108
1109 } else
1110 rt_rq->highest_prio.curr = MAX_RT_PRIO;
1111
1112 dec_rt_prio_smp(rt_rq, prio, prev_prio);
1113 }
1114
1115 #else
1116
1117 static inline void inc_rt_prio(struct rt_rq *rt_rq, int prio) {}
1118 static inline void dec_rt_prio(struct rt_rq *rt_rq, int prio) {}
1119
1120 #endif
1121
1122 #ifdef CONFIG_RT_GROUP_SCHED
1123
1124 static void
1125 inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
1126 {
1127 if (rt_se_boosted(rt_se))
1128 rt_rq->rt_nr_boosted++;
1129
1130 if (rt_rq->tg)
1131 start_rt_bandwidth(&rt_rq->tg->rt_bandwidth);
1132 }
1133
1134 static void
1135 dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
1136 {
1137 if (rt_se_boosted(rt_se))
1138 rt_rq->rt_nr_boosted--;
1139
1140 WARN_ON(!rt_rq->rt_nr_running && rt_rq->rt_nr_boosted);
1141 }
1142
1143 #else
1144
1145 static void
1146 inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
1147 {
1148 start_rt_bandwidth(&def_rt_bandwidth);
1149 }
1150
1151 static inline
1152 void dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) {}
1153
1154 #endif
1155
1156 static inline
1157 unsigned int rt_se_nr_running(struct sched_rt_entity *rt_se)
1158 {
1159 struct rt_rq *group_rq = group_rt_rq(rt_se);
1160
1161 if (group_rq)
1162 return group_rq->rt_nr_running;
1163 else
1164 return 1;
1165 }
1166
1167 static inline
1168 unsigned int rt_se_rr_nr_running(struct sched_rt_entity *rt_se)
1169 {
1170 struct rt_rq *group_rq = group_rt_rq(rt_se);
1171 struct task_struct *tsk;
1172
1173 if (group_rq)
1174 return group_rq->rr_nr_running;
1175
1176 tsk = rt_task_of(rt_se);
1177
1178 return (tsk->policy == SCHED_RR) ? 1 : 0;
1179 }
1180
1181 static inline
1182 void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
1183 {
1184 int prio = rt_se_prio(rt_se);
1185
1186 WARN_ON(!rt_prio(prio));
1187 rt_rq->rt_nr_running += rt_se_nr_running(rt_se);
1188 rt_rq->rr_nr_running += rt_se_rr_nr_running(rt_se);
1189
1190 inc_rt_prio(rt_rq, prio);
1191 inc_rt_migration(rt_se, rt_rq);
1192 inc_rt_group(rt_se, rt_rq);
1193 }
1194
1195 static inline
1196 void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
1197 {
1198 WARN_ON(!rt_prio(rt_se_prio(rt_se)));
1199 WARN_ON(!rt_rq->rt_nr_running);
1200 rt_rq->rt_nr_running -= rt_se_nr_running(rt_se);
1201 rt_rq->rr_nr_running -= rt_se_rr_nr_running(rt_se);
1202
1203 dec_rt_prio(rt_rq, rt_se_prio(rt_se));
1204 dec_rt_migration(rt_se, rt_rq);
1205 dec_rt_group(rt_se, rt_rq);
1206 }
1207
1208
1209
1210
1211
1212
1213 static inline bool move_entity(unsigned int flags)
1214 {
1215 if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) == DEQUEUE_SAVE)
1216 return false;
1217
1218 return true;
1219 }
1220
1221 static void __delist_rt_entity(struct sched_rt_entity *rt_se, struct rt_prio_array *array)
1222 {
1223 list_del_init(&rt_se->run_list);
1224
1225 if (list_empty(array->queue + rt_se_prio(rt_se)))
1226 __clear_bit(rt_se_prio(rt_se), array->bitmap);
1227
1228 rt_se->on_list = 0;
1229 }
1230
1231 static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
1232 {
1233 struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
1234 struct rt_prio_array *array = &rt_rq->active;
1235 struct rt_rq *group_rq = group_rt_rq(rt_se);
1236 struct list_head *queue = array->queue + rt_se_prio(rt_se);
1237
1238
1239
1240
1241
1242
1243
1244 if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) {
1245 if (rt_se->on_list)
1246 __delist_rt_entity(rt_se, array);
1247 return;
1248 }
1249
1250 if (move_entity(flags)) {
1251 WARN_ON_ONCE(rt_se->on_list);
1252 if (flags & ENQUEUE_HEAD)
1253 list_add(&rt_se->run_list, queue);
1254 else
1255 list_add_tail(&rt_se->run_list, queue);
1256
1257 __set_bit(rt_se_prio(rt_se), array->bitmap);
1258 rt_se->on_list = 1;
1259 }
1260 rt_se->on_rq = 1;
1261
1262 inc_rt_tasks(rt_se, rt_rq);
1263 }
1264
1265 static void __dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
1266 {
1267 struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
1268 struct rt_prio_array *array = &rt_rq->active;
1269
1270 if (move_entity(flags)) {
1271 WARN_ON_ONCE(!rt_se->on_list);
1272 __delist_rt_entity(rt_se, array);
1273 }
1274 rt_se->on_rq = 0;
1275
1276 dec_rt_tasks(rt_se, rt_rq);
1277 }
1278
1279
1280
1281
1282
1283 static void dequeue_rt_stack(struct sched_rt_entity *rt_se, unsigned int flags)
1284 {
1285 struct sched_rt_entity *back = NULL;
1286
1287 for_each_sched_rt_entity(rt_se) {
1288 rt_se->back = back;
1289 back = rt_se;
1290 }
1291
1292 dequeue_top_rt_rq(rt_rq_of_se(back));
1293
1294 for (rt_se = back; rt_se; rt_se = rt_se->back) {
1295 if (on_rt_rq(rt_se))
1296 __dequeue_rt_entity(rt_se, flags);
1297 }
1298 }
1299
1300 static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
1301 {
1302 struct rq *rq = rq_of_rt_se(rt_se);
1303
1304 dequeue_rt_stack(rt_se, flags);
1305 for_each_sched_rt_entity(rt_se)
1306 __enqueue_rt_entity(rt_se, flags);
1307 enqueue_top_rt_rq(&rq->rt);
1308 }
1309
1310 static void dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
1311 {
1312 struct rq *rq = rq_of_rt_se(rt_se);
1313
1314 dequeue_rt_stack(rt_se, flags);
1315
1316 for_each_sched_rt_entity(rt_se) {
1317 struct rt_rq *rt_rq = group_rt_rq(rt_se);
1318
1319 if (rt_rq && rt_rq->rt_nr_running)
1320 __enqueue_rt_entity(rt_se, flags);
1321 }
1322 enqueue_top_rt_rq(&rq->rt);
1323 }
1324
1325
1326
1327
1328 static void
1329 enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
1330 {
1331 struct sched_rt_entity *rt_se = &p->rt;
1332
1333 if (flags & ENQUEUE_WAKEUP)
1334 rt_se->timeout = 0;
1335
1336 enqueue_rt_entity(rt_se, flags);
1337
1338 if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
1339 enqueue_pushable_task(rq, p);
1340 }
1341
1342 static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
1343 {
1344 struct sched_rt_entity *rt_se = &p->rt;
1345
1346 update_curr_rt(rq);
1347 dequeue_rt_entity(rt_se, flags);
1348
1349 dequeue_pushable_task(rq, p);
1350 }
1351
1352
1353
1354
1355
1356 static void
1357 requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int head)
1358 {
1359 if (on_rt_rq(rt_se)) {
1360 struct rt_prio_array *array = &rt_rq->active;
1361 struct list_head *queue = array->queue + rt_se_prio(rt_se);
1362
1363 if (head)
1364 list_move(&rt_se->run_list, queue);
1365 else
1366 list_move_tail(&rt_se->run_list, queue);
1367 }
1368 }
1369
1370 static void requeue_task_rt(struct rq *rq, struct task_struct *p, int head)
1371 {
1372 struct sched_rt_entity *rt_se = &p->rt;
1373 struct rt_rq *rt_rq;
1374
1375 for_each_sched_rt_entity(rt_se) {
1376 rt_rq = rt_rq_of_se(rt_se);
1377 requeue_rt_entity(rt_rq, rt_se, head);
1378 }
1379 }
1380
1381 static void yield_task_rt(struct rq *rq)
1382 {
1383 requeue_task_rt(rq, rq->curr, 0);
1384 }
1385
1386 #ifdef CONFIG_SMP
1387 static int find_lowest_rq(struct task_struct *task);
1388
1389 static int
1390 select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
1391 {
1392 struct task_struct *curr;
1393 struct rq *rq;
1394
1395
1396 if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK)
1397 goto out;
1398
1399 rq = cpu_rq(cpu);
1400
1401 rcu_read_lock();
1402 curr = READ_ONCE(rq->curr);
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426 if (curr && unlikely(rt_task(curr)) &&
1427 (curr->nr_cpus_allowed < 2 ||
1428 curr->prio <= p->prio)) {
1429 int target = find_lowest_rq(p);
1430
1431
1432
1433
1434
1435 if (target != -1 &&
1436 p->prio < cpu_rq(target)->rt.highest_prio.curr)
1437 cpu = target;
1438 }
1439 rcu_read_unlock();
1440
1441 out:
1442 return cpu;
1443 }
1444
1445 static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
1446 {
1447
1448
1449
1450
1451 if (rq->curr->nr_cpus_allowed == 1 ||
1452 !cpupri_find(&rq->rd->cpupri, rq->curr, NULL))
1453 return;
1454
1455
1456
1457
1458
1459 if (p->nr_cpus_allowed != 1
1460 && cpupri_find(&rq->rd->cpupri, p, NULL))
1461 return;
1462
1463
1464
1465
1466
1467
1468 requeue_task_rt(rq, p, 1);
1469 resched_curr(rq);
1470 }
1471
1472 static int balance_rt(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
1473 {
1474 if (!on_rt_rq(&p->rt) && need_pull_rt_task(rq, p)) {
1475
1476
1477
1478
1479
1480
1481 rq_unpin_lock(rq, rf);
1482 pull_rt_task(rq);
1483 rq_repin_lock(rq, rf);
1484 }
1485
1486 return sched_stop_runnable(rq) || sched_dl_runnable(rq) || sched_rt_runnable(rq);
1487 }
1488 #endif
1489
1490
1491
1492
1493 static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags)
1494 {
1495 if (p->prio < rq->curr->prio) {
1496 resched_curr(rq);
1497 return;
1498 }
1499
1500 #ifdef CONFIG_SMP
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513 if (p->prio == rq->curr->prio && !test_tsk_need_resched(rq->curr))
1514 check_preempt_equal_prio(rq, p);
1515 #endif
1516 }
1517
1518 static inline void set_next_task_rt(struct rq *rq, struct task_struct *p, bool first)
1519 {
1520 p->se.exec_start = rq_clock_task(rq);
1521
1522
1523 dequeue_pushable_task(rq, p);
1524
1525 if (!first)
1526 return;
1527
1528
1529
1530
1531
1532
1533 if (rq->curr->sched_class != &rt_sched_class)
1534 update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 0);
1535
1536 rt_queue_push_tasks(rq);
1537 }
1538
1539 static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq,
1540 struct rt_rq *rt_rq)
1541 {
1542 struct rt_prio_array *array = &rt_rq->active;
1543 struct sched_rt_entity *next = NULL;
1544 struct list_head *queue;
1545 int idx;
1546
1547 idx = sched_find_first_bit(array->bitmap);
1548 BUG_ON(idx >= MAX_RT_PRIO);
1549
1550 queue = array->queue + idx;
1551 next = list_entry(queue->next, struct sched_rt_entity, run_list);
1552
1553 return next;
1554 }
1555
1556 static struct task_struct *_pick_next_task_rt(struct rq *rq)
1557 {
1558 struct sched_rt_entity *rt_se;
1559 struct rt_rq *rt_rq = &rq->rt;
1560
1561 do {
1562 rt_se = pick_next_rt_entity(rq, rt_rq);
1563 BUG_ON(!rt_se);
1564 rt_rq = group_rt_rq(rt_se);
1565 } while (rt_rq);
1566
1567 return rt_task_of(rt_se);
1568 }
1569
1570 static struct task_struct *
1571 pick_next_task_rt(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
1572 {
1573 struct task_struct *p;
1574
1575 WARN_ON_ONCE(prev || rf);
1576
1577 if (!sched_rt_runnable(rq))
1578 return NULL;
1579
1580 p = _pick_next_task_rt(rq);
1581 set_next_task_rt(rq, p, true);
1582 return p;
1583 }
1584
1585 static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
1586 {
1587 update_curr_rt(rq);
1588
1589 update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 1);
1590
1591
1592
1593
1594
1595 if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1)
1596 enqueue_pushable_task(rq, p);
1597 }
1598
1599 #ifdef CONFIG_SMP
1600
1601
1602 #define RT_MAX_TRIES 3
1603
1604 static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
1605 {
1606 if (!task_running(rq, p) &&
1607 cpumask_test_cpu(cpu, p->cpus_ptr))
1608 return 1;
1609
1610 return 0;
1611 }
1612
1613
1614
1615
1616
1617 static struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu)
1618 {
1619 struct plist_head *head = &rq->rt.pushable_tasks;
1620 struct task_struct *p;
1621
1622 if (!has_pushable_tasks(rq))
1623 return NULL;
1624
1625 plist_for_each_entry(p, head, pushable_tasks) {
1626 if (pick_rt_task(rq, p, cpu))
1627 return p;
1628 }
1629
1630 return NULL;
1631 }
1632
1633 static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
1634
1635 static int find_lowest_rq(struct task_struct *task)
1636 {
1637 struct sched_domain *sd;
1638 struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask);
1639 int this_cpu = smp_processor_id();
1640 int cpu = task_cpu(task);
1641
1642
1643 if (unlikely(!lowest_mask))
1644 return -1;
1645
1646 if (task->nr_cpus_allowed == 1)
1647 return -1;
1648
1649 if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask))
1650 return -1;
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660 if (cpumask_test_cpu(cpu, lowest_mask))
1661 return cpu;
1662
1663
1664
1665
1666
1667 if (!cpumask_test_cpu(this_cpu, lowest_mask))
1668 this_cpu = -1;
1669
1670 rcu_read_lock();
1671 for_each_domain(cpu, sd) {
1672 if (sd->flags & SD_WAKE_AFFINE) {
1673 int best_cpu;
1674
1675
1676
1677
1678
1679 if (this_cpu != -1 &&
1680 cpumask_test_cpu(this_cpu, sched_domain_span(sd))) {
1681 rcu_read_unlock();
1682 return this_cpu;
1683 }
1684
1685 best_cpu = cpumask_first_and(lowest_mask,
1686 sched_domain_span(sd));
1687 if (best_cpu < nr_cpu_ids) {
1688 rcu_read_unlock();
1689 return best_cpu;
1690 }
1691 }
1692 }
1693 rcu_read_unlock();
1694
1695
1696
1697
1698
1699
1700 if (this_cpu != -1)
1701 return this_cpu;
1702
1703 cpu = cpumask_any(lowest_mask);
1704 if (cpu < nr_cpu_ids)
1705 return cpu;
1706
1707 return -1;
1708 }
1709
1710
1711 static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
1712 {
1713 struct rq *lowest_rq = NULL;
1714 int tries;
1715 int cpu;
1716
1717 for (tries = 0; tries < RT_MAX_TRIES; tries++) {
1718 cpu = find_lowest_rq(task);
1719
1720 if ((cpu == -1) || (cpu == rq->cpu))
1721 break;
1722
1723 lowest_rq = cpu_rq(cpu);
1724
1725 if (lowest_rq->rt.highest_prio.curr <= task->prio) {
1726
1727
1728
1729
1730
1731 lowest_rq = NULL;
1732 break;
1733 }
1734
1735
1736 if (double_lock_balance(rq, lowest_rq)) {
1737
1738
1739
1740
1741
1742
1743 if (unlikely(task_rq(task) != rq ||
1744 !cpumask_test_cpu(lowest_rq->cpu, task->cpus_ptr) ||
1745 task_running(rq, task) ||
1746 !rt_task(task) ||
1747 !task_on_rq_queued(task))) {
1748
1749 double_unlock_balance(rq, lowest_rq);
1750 lowest_rq = NULL;
1751 break;
1752 }
1753 }
1754
1755
1756 if (lowest_rq->rt.highest_prio.curr > task->prio)
1757 break;
1758
1759
1760 double_unlock_balance(rq, lowest_rq);
1761 lowest_rq = NULL;
1762 }
1763
1764 return lowest_rq;
1765 }
1766
1767 static struct task_struct *pick_next_pushable_task(struct rq *rq)
1768 {
1769 struct task_struct *p;
1770
1771 if (!has_pushable_tasks(rq))
1772 return NULL;
1773
1774 p = plist_first_entry(&rq->rt.pushable_tasks,
1775 struct task_struct, pushable_tasks);
1776
1777 BUG_ON(rq->cpu != task_cpu(p));
1778 BUG_ON(task_current(rq, p));
1779 BUG_ON(p->nr_cpus_allowed <= 1);
1780
1781 BUG_ON(!task_on_rq_queued(p));
1782 BUG_ON(!rt_task(p));
1783
1784 return p;
1785 }
1786
1787
1788
1789
1790
1791
1792 static int push_rt_task(struct rq *rq)
1793 {
1794 struct task_struct *next_task;
1795 struct rq *lowest_rq;
1796 int ret = 0;
1797
1798 if (!rq->rt.overloaded)
1799 return 0;
1800
1801 next_task = pick_next_pushable_task(rq);
1802 if (!next_task)
1803 return 0;
1804
1805 retry:
1806 if (WARN_ON(next_task == rq->curr))
1807 return 0;
1808
1809
1810
1811
1812
1813
1814 if (unlikely(next_task->prio < rq->curr->prio)) {
1815 resched_curr(rq);
1816 return 0;
1817 }
1818
1819
1820 get_task_struct(next_task);
1821
1822
1823 lowest_rq = find_lock_lowest_rq(next_task, rq);
1824 if (!lowest_rq) {
1825 struct task_struct *task;
1826
1827
1828
1829
1830
1831
1832
1833
1834 task = pick_next_pushable_task(rq);
1835 if (task == next_task) {
1836
1837
1838
1839
1840
1841
1842 goto out;
1843 }
1844
1845 if (!task)
1846
1847 goto out;
1848
1849
1850
1851
1852 put_task_struct(next_task);
1853 next_task = task;
1854 goto retry;
1855 }
1856
1857 deactivate_task(rq, next_task, 0);
1858 set_task_cpu(next_task, lowest_rq->cpu);
1859 activate_task(lowest_rq, next_task, 0);
1860 ret = 1;
1861
1862 resched_curr(lowest_rq);
1863
1864 double_unlock_balance(rq, lowest_rq);
1865
1866 out:
1867 put_task_struct(next_task);
1868
1869 return ret;
1870 }
1871
1872 static void push_rt_tasks(struct rq *rq)
1873 {
1874
1875 while (push_rt_task(rq))
1876 ;
1877 }
1878
1879 #ifdef HAVE_RT_PUSH_IPI
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922 static int rto_next_cpu(struct root_domain *rd)
1923 {
1924 int next;
1925 int cpu;
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940 for (;;) {
1941
1942
1943 cpu = cpumask_next(rd->rto_cpu, rd->rto_mask);
1944
1945 rd->rto_cpu = cpu;
1946
1947 if (cpu < nr_cpu_ids)
1948 return cpu;
1949
1950 rd->rto_cpu = -1;
1951
1952
1953
1954
1955
1956
1957
1958 next = atomic_read_acquire(&rd->rto_loop_next);
1959
1960 if (rd->rto_loop == next)
1961 break;
1962
1963 rd->rto_loop = next;
1964 }
1965
1966 return -1;
1967 }
1968
1969 static inline bool rto_start_trylock(atomic_t *v)
1970 {
1971 return !atomic_cmpxchg_acquire(v, 0, 1);
1972 }
1973
1974 static inline void rto_start_unlock(atomic_t *v)
1975 {
1976 atomic_set_release(v, 0);
1977 }
1978
1979 static void tell_cpu_to_push(struct rq *rq)
1980 {
1981 int cpu = -1;
1982
1983
1984 atomic_inc(&rq->rd->rto_loop_next);
1985
1986
1987 if (!rto_start_trylock(&rq->rd->rto_loop_start))
1988 return;
1989
1990 raw_spin_lock(&rq->rd->rto_lock);
1991
1992
1993
1994
1995
1996
1997
1998 if (rq->rd->rto_cpu < 0)
1999 cpu = rto_next_cpu(rq->rd);
2000
2001 raw_spin_unlock(&rq->rd->rto_lock);
2002
2003 rto_start_unlock(&rq->rd->rto_loop_start);
2004
2005 if (cpu >= 0) {
2006
2007 sched_get_rd(rq->rd);
2008 irq_work_queue_on(&rq->rd->rto_push_work, cpu);
2009 }
2010 }
2011
2012
2013 void rto_push_irq_work_func(struct irq_work *work)
2014 {
2015 struct root_domain *rd =
2016 container_of(work, struct root_domain, rto_push_work);
2017 struct rq *rq;
2018 int cpu;
2019
2020 rq = this_rq();
2021
2022
2023
2024
2025
2026 if (has_pushable_tasks(rq)) {
2027 raw_spin_lock(&rq->lock);
2028 push_rt_tasks(rq);
2029 raw_spin_unlock(&rq->lock);
2030 }
2031
2032 raw_spin_lock(&rd->rto_lock);
2033
2034
2035 cpu = rto_next_cpu(rd);
2036
2037 raw_spin_unlock(&rd->rto_lock);
2038
2039 if (cpu < 0) {
2040 sched_put_rd(rd);
2041 return;
2042 }
2043
2044
2045 irq_work_queue_on(&rd->rto_push_work, cpu);
2046 }
2047 #endif
2048
2049 static void pull_rt_task(struct rq *this_rq)
2050 {
2051 int this_cpu = this_rq->cpu, cpu;
2052 bool resched = false;
2053 struct task_struct *p;
2054 struct rq *src_rq;
2055 int rt_overload_count = rt_overloaded(this_rq);
2056
2057 if (likely(!rt_overload_count))
2058 return;
2059
2060
2061
2062
2063
2064 smp_rmb();
2065
2066
2067 if (rt_overload_count == 1 &&
2068 cpumask_test_cpu(this_rq->cpu, this_rq->rd->rto_mask))
2069 return;
2070
2071 #ifdef HAVE_RT_PUSH_IPI
2072 if (sched_feat(RT_PUSH_IPI)) {
2073 tell_cpu_to_push(this_rq);
2074 return;
2075 }
2076 #endif
2077
2078 for_each_cpu(cpu, this_rq->rd->rto_mask) {
2079 if (this_cpu == cpu)
2080 continue;
2081
2082 src_rq = cpu_rq(cpu);
2083
2084
2085
2086
2087
2088
2089
2090
2091 if (src_rq->rt.highest_prio.next >=
2092 this_rq->rt.highest_prio.curr)
2093 continue;
2094
2095
2096
2097
2098
2099
2100 double_lock_balance(this_rq, src_rq);
2101
2102
2103
2104
2105
2106 p = pick_highest_pushable_task(src_rq, this_cpu);
2107
2108
2109
2110
2111
2112 if (p && (p->prio < this_rq->rt.highest_prio.curr)) {
2113 WARN_ON(p == src_rq->curr);
2114 WARN_ON(!task_on_rq_queued(p));
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124 if (p->prio < src_rq->curr->prio)
2125 goto skip;
2126
2127 resched = true;
2128
2129 deactivate_task(src_rq, p, 0);
2130 set_task_cpu(p, this_cpu);
2131 activate_task(this_rq, p, 0);
2132
2133
2134
2135
2136
2137
2138 }
2139 skip:
2140 double_unlock_balance(this_rq, src_rq);
2141 }
2142
2143 if (resched)
2144 resched_curr(this_rq);
2145 }
2146
2147
2148
2149
2150
2151 static void task_woken_rt(struct rq *rq, struct task_struct *p)
2152 {
2153 if (!task_running(rq, p) &&
2154 !test_tsk_need_resched(rq->curr) &&
2155 p->nr_cpus_allowed > 1 &&
2156 (dl_task(rq->curr) || rt_task(rq->curr)) &&
2157 (rq->curr->nr_cpus_allowed < 2 ||
2158 rq->curr->prio <= p->prio))
2159 push_rt_tasks(rq);
2160 }
2161
2162
2163 static void rq_online_rt(struct rq *rq)
2164 {
2165 if (rq->rt.overloaded)
2166 rt_set_overload(rq);
2167
2168 __enable_runtime(rq);
2169
2170 cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio.curr);
2171 }
2172
2173
2174 static void rq_offline_rt(struct rq *rq)
2175 {
2176 if (rq->rt.overloaded)
2177 rt_clear_overload(rq);
2178
2179 __disable_runtime(rq);
2180
2181 cpupri_set(&rq->rd->cpupri, rq->cpu, CPUPRI_INVALID);
2182 }
2183
2184
2185
2186
2187
2188 static void switched_from_rt(struct rq *rq, struct task_struct *p)
2189 {
2190
2191
2192
2193
2194
2195
2196
2197 if (!task_on_rq_queued(p) || rq->rt.rt_nr_running)
2198 return;
2199
2200 rt_queue_pull_task(rq);
2201 }
2202
2203 void __init init_sched_rt_class(void)
2204 {
2205 unsigned int i;
2206
2207 for_each_possible_cpu(i) {
2208 zalloc_cpumask_var_node(&per_cpu(local_cpu_mask, i),
2209 GFP_KERNEL, cpu_to_node(i));
2210 }
2211 }
2212 #endif
2213
2214
2215
2216
2217
2218
2219 static void switched_to_rt(struct rq *rq, struct task_struct *p)
2220 {
2221
2222
2223
2224
2225
2226
2227
2228 if (task_on_rq_queued(p) && rq->curr != p) {
2229 #ifdef CONFIG_SMP
2230 if (p->nr_cpus_allowed > 1 && rq->rt.overloaded)
2231 rt_queue_push_tasks(rq);
2232 #endif
2233 if (p->prio < rq->curr->prio && cpu_online(cpu_of(rq)))
2234 resched_curr(rq);
2235 }
2236 }
2237
2238
2239
2240
2241
2242 static void
2243 prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio)
2244 {
2245 if (!task_on_rq_queued(p))
2246 return;
2247
2248 if (rq->curr == p) {
2249 #ifdef CONFIG_SMP
2250
2251
2252
2253
2254 if (oldprio < p->prio)
2255 rt_queue_pull_task(rq);
2256
2257
2258
2259
2260
2261 if (p->prio > rq->rt.highest_prio.curr)
2262 resched_curr(rq);
2263 #else
2264
2265 if (oldprio < p->prio)
2266 resched_curr(rq);
2267 #endif
2268 } else {
2269
2270
2271
2272
2273
2274 if (p->prio < rq->curr->prio)
2275 resched_curr(rq);
2276 }
2277 }
2278
2279 #ifdef CONFIG_POSIX_TIMERS
2280 static void watchdog(struct rq *rq, struct task_struct *p)
2281 {
2282 unsigned long soft, hard;
2283
2284
2285 soft = task_rlimit(p, RLIMIT_RTTIME);
2286 hard = task_rlimit_max(p, RLIMIT_RTTIME);
2287
2288 if (soft != RLIM_INFINITY) {
2289 unsigned long next;
2290
2291 if (p->rt.watchdog_stamp != jiffies) {
2292 p->rt.timeout++;
2293 p->rt.watchdog_stamp = jiffies;
2294 }
2295
2296 next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ);
2297 if (p->rt.timeout > next) {
2298 posix_cputimers_rt_watchdog(&p->posix_cputimers,
2299 p->se.sum_exec_runtime);
2300 }
2301 }
2302 }
2303 #else
2304 static inline void watchdog(struct rq *rq, struct task_struct *p) { }
2305 #endif
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315 static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
2316 {
2317 struct sched_rt_entity *rt_se = &p->rt;
2318
2319 update_curr_rt(rq);
2320 update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 1);
2321
2322 watchdog(rq, p);
2323
2324
2325
2326
2327
2328 if (p->policy != SCHED_RR)
2329 return;
2330
2331 if (--p->rt.time_slice)
2332 return;
2333
2334 p->rt.time_slice = sched_rr_timeslice;
2335
2336
2337
2338
2339
2340 for_each_sched_rt_entity(rt_se) {
2341 if (rt_se->run_list.prev != rt_se->run_list.next) {
2342 requeue_task_rt(rq, p, 0);
2343 resched_curr(rq);
2344 return;
2345 }
2346 }
2347 }
2348
2349 static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
2350 {
2351
2352
2353
2354 if (task->policy == SCHED_RR)
2355 return sched_rr_timeslice;
2356 else
2357 return 0;
2358 }
2359
2360 const struct sched_class rt_sched_class = {
2361 .next = &fair_sched_class,
2362 .enqueue_task = enqueue_task_rt,
2363 .dequeue_task = dequeue_task_rt,
2364 .yield_task = yield_task_rt,
2365
2366 .check_preempt_curr = check_preempt_curr_rt,
2367
2368 .pick_next_task = pick_next_task_rt,
2369 .put_prev_task = put_prev_task_rt,
2370 .set_next_task = set_next_task_rt,
2371
2372 #ifdef CONFIG_SMP
2373 .balance = balance_rt,
2374 .select_task_rq = select_task_rq_rt,
2375 .set_cpus_allowed = set_cpus_allowed_common,
2376 .rq_online = rq_online_rt,
2377 .rq_offline = rq_offline_rt,
2378 .task_woken = task_woken_rt,
2379 .switched_from = switched_from_rt,
2380 #endif
2381
2382 .task_tick = task_tick_rt,
2383
2384 .get_rr_interval = get_rr_interval_rt,
2385
2386 .prio_changed = prio_changed_rt,
2387 .switched_to = switched_to_rt,
2388
2389 .update_curr = update_curr_rt,
2390
2391 #ifdef CONFIG_UCLAMP_TASK
2392 .uclamp_enabled = 1,
2393 #endif
2394 };
2395
2396 #ifdef CONFIG_RT_GROUP_SCHED
2397
2398
2399
2400 static DEFINE_MUTEX(rt_constraints_mutex);
2401
2402
2403 static inline int tg_has_rt_tasks(struct task_group *tg)
2404 {
2405 struct task_struct *g, *p;
2406
2407
2408
2409
2410 if (task_group_is_autogroup(tg))
2411 return 0;
2412
2413 for_each_process_thread(g, p) {
2414 if (rt_task(p) && task_group(p) == tg)
2415 return 1;
2416 }
2417
2418 return 0;
2419 }
2420
2421 struct rt_schedulable_data {
2422 struct task_group *tg;
2423 u64 rt_period;
2424 u64 rt_runtime;
2425 };
2426
2427 static int tg_rt_schedulable(struct task_group *tg, void *data)
2428 {
2429 struct rt_schedulable_data *d = data;
2430 struct task_group *child;
2431 unsigned long total, sum = 0;
2432 u64 period, runtime;
2433
2434 period = ktime_to_ns(tg->rt_bandwidth.rt_period);
2435 runtime = tg->rt_bandwidth.rt_runtime;
2436
2437 if (tg == d->tg) {
2438 period = d->rt_period;
2439 runtime = d->rt_runtime;
2440 }
2441
2442
2443
2444
2445 if (runtime > period && runtime != RUNTIME_INF)
2446 return -EINVAL;
2447
2448
2449
2450
2451 if (rt_bandwidth_enabled() && !runtime && tg_has_rt_tasks(tg))
2452 return -EBUSY;
2453
2454 total = to_ratio(period, runtime);
2455
2456
2457
2458
2459 if (total > to_ratio(global_rt_period(), global_rt_runtime()))
2460 return -EINVAL;
2461
2462
2463
2464
2465 list_for_each_entry_rcu(child, &tg->children, siblings) {
2466 period = ktime_to_ns(child->rt_bandwidth.rt_period);
2467 runtime = child->rt_bandwidth.rt_runtime;
2468
2469 if (child == d->tg) {
2470 period = d->rt_period;
2471 runtime = d->rt_runtime;
2472 }
2473
2474 sum += to_ratio(period, runtime);
2475 }
2476
2477 if (sum > total)
2478 return -EINVAL;
2479
2480 return 0;
2481 }
2482
2483 static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
2484 {
2485 int ret;
2486
2487 struct rt_schedulable_data data = {
2488 .tg = tg,
2489 .rt_period = period,
2490 .rt_runtime = runtime,
2491 };
2492
2493 rcu_read_lock();
2494 ret = walk_tg_tree(tg_rt_schedulable, tg_nop, &data);
2495 rcu_read_unlock();
2496
2497 return ret;
2498 }
2499
2500 static int tg_set_rt_bandwidth(struct task_group *tg,
2501 u64 rt_period, u64 rt_runtime)
2502 {
2503 int i, err = 0;
2504
2505
2506
2507
2508
2509 if (tg == &root_task_group && rt_runtime == 0)
2510 return -EINVAL;
2511
2512
2513 if (rt_period == 0)
2514 return -EINVAL;
2515
2516 mutex_lock(&rt_constraints_mutex);
2517 read_lock(&tasklist_lock);
2518 err = __rt_schedulable(tg, rt_period, rt_runtime);
2519 if (err)
2520 goto unlock;
2521
2522 raw_spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock);
2523 tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period);
2524 tg->rt_bandwidth.rt_runtime = rt_runtime;
2525
2526 for_each_possible_cpu(i) {
2527 struct rt_rq *rt_rq = tg->rt_rq[i];
2528
2529 raw_spin_lock(&rt_rq->rt_runtime_lock);
2530 rt_rq->rt_runtime = rt_runtime;
2531 raw_spin_unlock(&rt_rq->rt_runtime_lock);
2532 }
2533 raw_spin_unlock_irq(&tg->rt_bandwidth.rt_runtime_lock);
2534 unlock:
2535 read_unlock(&tasklist_lock);
2536 mutex_unlock(&rt_constraints_mutex);
2537
2538 return err;
2539 }
2540
2541 int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us)
2542 {
2543 u64 rt_runtime, rt_period;
2544
2545 rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period);
2546 rt_runtime = (u64)rt_runtime_us * NSEC_PER_USEC;
2547 if (rt_runtime_us < 0)
2548 rt_runtime = RUNTIME_INF;
2549 else if ((u64)rt_runtime_us > U64_MAX / NSEC_PER_USEC)
2550 return -EINVAL;
2551
2552 return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
2553 }
2554
2555 long sched_group_rt_runtime(struct task_group *tg)
2556 {
2557 u64 rt_runtime_us;
2558
2559 if (tg->rt_bandwidth.rt_runtime == RUNTIME_INF)
2560 return -1;
2561
2562 rt_runtime_us = tg->rt_bandwidth.rt_runtime;
2563 do_div(rt_runtime_us, NSEC_PER_USEC);
2564 return rt_runtime_us;
2565 }
2566
2567 int sched_group_set_rt_period(struct task_group *tg, u64 rt_period_us)
2568 {
2569 u64 rt_runtime, rt_period;
2570
2571 if (rt_period_us > U64_MAX / NSEC_PER_USEC)
2572 return -EINVAL;
2573
2574 rt_period = rt_period_us * NSEC_PER_USEC;
2575 rt_runtime = tg->rt_bandwidth.rt_runtime;
2576
2577 return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
2578 }
2579
2580 long sched_group_rt_period(struct task_group *tg)
2581 {
2582 u64 rt_period_us;
2583
2584 rt_period_us = ktime_to_ns(tg->rt_bandwidth.rt_period);
2585 do_div(rt_period_us, NSEC_PER_USEC);
2586 return rt_period_us;
2587 }
2588
2589 static int sched_rt_global_constraints(void)
2590 {
2591 int ret = 0;
2592
2593 mutex_lock(&rt_constraints_mutex);
2594 read_lock(&tasklist_lock);
2595 ret = __rt_schedulable(NULL, 0, 0);
2596 read_unlock(&tasklist_lock);
2597 mutex_unlock(&rt_constraints_mutex);
2598
2599 return ret;
2600 }
2601
2602 int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk)
2603 {
2604
2605 if (rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0)
2606 return 0;
2607
2608 return 1;
2609 }
2610
2611 #else
2612 static int sched_rt_global_constraints(void)
2613 {
2614 unsigned long flags;
2615 int i;
2616
2617 raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
2618 for_each_possible_cpu(i) {
2619 struct rt_rq *rt_rq = &cpu_rq(i)->rt;
2620
2621 raw_spin_lock(&rt_rq->rt_runtime_lock);
2622 rt_rq->rt_runtime = global_rt_runtime();
2623 raw_spin_unlock(&rt_rq->rt_runtime_lock);
2624 }
2625 raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags);
2626
2627 return 0;
2628 }
2629 #endif
2630
2631 static int sched_rt_global_validate(void)
2632 {
2633 if (sysctl_sched_rt_period <= 0)
2634 return -EINVAL;
2635
2636 if ((sysctl_sched_rt_runtime != RUNTIME_INF) &&
2637 (sysctl_sched_rt_runtime > sysctl_sched_rt_period))
2638 return -EINVAL;
2639
2640 return 0;
2641 }
2642
2643 static void sched_rt_do_global(void)
2644 {
2645 def_rt_bandwidth.rt_runtime = global_rt_runtime();
2646 def_rt_bandwidth.rt_period = ns_to_ktime(global_rt_period());
2647 }
2648
2649 int sched_rt_handler(struct ctl_table *table, int write,
2650 void __user *buffer, size_t *lenp,
2651 loff_t *ppos)
2652 {
2653 int old_period, old_runtime;
2654 static DEFINE_MUTEX(mutex);
2655 int ret;
2656
2657 mutex_lock(&mutex);
2658 old_period = sysctl_sched_rt_period;
2659 old_runtime = sysctl_sched_rt_runtime;
2660
2661 ret = proc_dointvec(table, write, buffer, lenp, ppos);
2662
2663 if (!ret && write) {
2664 ret = sched_rt_global_validate();
2665 if (ret)
2666 goto undo;
2667
2668 ret = sched_dl_global_validate();
2669 if (ret)
2670 goto undo;
2671
2672 ret = sched_rt_global_constraints();
2673 if (ret)
2674 goto undo;
2675
2676 sched_rt_do_global();
2677 sched_dl_do_global();
2678 }
2679 if (0) {
2680 undo:
2681 sysctl_sched_rt_period = old_period;
2682 sysctl_sched_rt_runtime = old_runtime;
2683 }
2684 mutex_unlock(&mutex);
2685
2686 return ret;
2687 }
2688
2689 int sched_rr_handler(struct ctl_table *table, int write,
2690 void __user *buffer, size_t *lenp,
2691 loff_t *ppos)
2692 {
2693 int ret;
2694 static DEFINE_MUTEX(mutex);
2695
2696 mutex_lock(&mutex);
2697 ret = proc_dointvec(table, write, buffer, lenp, ppos);
2698
2699
2700
2701
2702 if (!ret && write) {
2703 sched_rr_timeslice =
2704 sysctl_sched_rr_timeslice <= 0 ? RR_TIMESLICE :
2705 msecs_to_jiffies(sysctl_sched_rr_timeslice);
2706 }
2707 mutex_unlock(&mutex);
2708
2709 return ret;
2710 }
2711
2712 #ifdef CONFIG_SCHED_DEBUG
2713 void print_rt_stats(struct seq_file *m, int cpu)
2714 {
2715 rt_rq_iter_t iter;
2716 struct rt_rq *rt_rq;
2717
2718 rcu_read_lock();
2719 for_each_rt_rq(rt_rq, iter, cpu_rq(cpu))
2720 print_rt_rq(m, cpu, rt_rq);
2721 rcu_read_unlock();
2722 }
2723 #endif