如何在内核2.4.20-8中添加新的进程调度算法

windfast · 发表于 2003-4-24 22:27:37

内核2.4.20-8就是Red Hat 9.0使用的内核

我想在这个内核中添加一种新的进程调度算法（相对于SCHED_OTHER,SCHED_FIFO,SCHED_RR而言），我不晓得应该怎么去做。我看得资料都比较老，没有关于2.4.20-8内核的介绍，Red Hat自己附带的文档也太少了。

望各位大侠提点一下，小弟万分感激。

windfast · 发表于 2003-4-24 23:07:59

更正一下，

2.4.20-8中叫SCHED_NORMAL不是SCHED_OTHER

windfast · 发表于 2003-4-25 10:11:37

sudy · 发表于 2003-5-4 17:05:35

最近我也在作这个,初步想做一个多级优先对列,减少在调度时遍历整个run_task的时间

ZeroC · 发表于 2003-5-9 03:53:42

我以前也想过类似的，但终归无用，你可以查看一下linux code process ,它里面的进程调度算法中心很明确，我想如果在加一加二的话，会画蛇填足，但想加去的方法当然是有的，但现在kernel version很多不同，我想需要分个类，不过把你的算法拿出来，一起研究！

sudy · 发表于 2003-5-9 23:31:22

不知道大家看过Rik van Riel写的fair share scheduler patch for 2.3.99-pre3  没有？
写得很简单，但是我没有怎么看懂。不知道哪位大侠能帮忙分析一下：
--- linux-2.3.99-pre3/kernel/sched.c.orig Mon Mar 27 14:05:25 2000
+++ linux-2.3.99-pre3/kernel/sched.c Wed Mar 29 18:37:21 2000
@@ -41,6 +41,10 @@

extern void mem_use(void);

+#ifdef CONFIG_FAIRSCHED
+int fairsched = 0; /* toggle fair scheduler on/off */
+#endif /* CONFIG_FAIRSCHED */
+
/*
  * Init task must be ok at boot for the ix86 as we will check its signals
  * via the SMP irq return path.
@@ -61,6 +65,7 @@
  */
spinlock_t runqueue_lock = SPIN_LOCK_UNLOCKED;  /* second */
rwlock_t tasklist_lock = RW_LOCK_UNLOCKED; /* third */
+spinlock_t recalc_lock = SPIN_LOCK_UNLOCKED;

static LIST_HEAD(runqueue_head);

@@ -600,10 +605,61 @@
{
struct task_struct *p;
spin_unlock_irq(&runqueue_lock);
- read_lock(&tasklist_lock);
- for_each_task(p)
- p->counter = (p->counter >> 1) + p->priority;
- read_unlock(&tasklist_lock);
+#ifdef CONFIG_FAIRSCHED
+ /*
+    * Simple, low-overhead fair share scheduler. It works by handing
+    * out CPU time like we do at the normal recalculation.
+    * The catch is that we move the list head (where the for_each_task()
+    * loop starts) to _after_ the first task where we ran out of quota.
+    * This means that if a user has too many runnable processes, his
+    * tasks will get extra CPU time here in turns. -- Rik
+    */
+ if (fairsched) {
+ struct task_struct *ran_out = NULL;
+ struct user_struct *up;
+ long oldcounter;
+ // if (!spin_trylock(&recalc_lock)) {
+ // spin_lock_irq(&runqueue_lock);
+ // goto repeat_schedule;
+ // }
+ write_lock(&tasklist_lock);
+ for_each_task(p) {
+ up = p->user;
+ if (!up) {
+ p->counter = (p->counter >> 1) + p->priority;
+ continue;
+ }
+ if (!up->cpu_ticks)
+ continue;
+ oldcounter = p->counter;
+ p->counter = (p->counter >> 1) + p->priority;
+ up->cpu_ticks += oldcounter;
+ up->cpu_ticks -= p->counter;
+ if (up->cpu_ticks <= 0 && ran_out == NULL) {
+ up->cpu_ticks = 0;
+ ran_out = p;
+ }
+ }
+ if (ran_out) {
+ move_tq_head(ran_out);
+ }
+ write_unlock(&tasklist_lock);
+
+ read_lock(&uidhash_lock);
+ for_each_user_struct(up) {
+ /* replace DEF_PRIORITY with user quota */
+ up->cpu_ticks = (up->cpu_ticks >> 1) + DEF_PRIORITY;
+ }
+ read_unlock(&uidhash_lock);
+ // spin_unlock(&recalc_lock);
+ } else
+#endif /* CONFIG_FAIRSCHED */
+ {
+ read_lock(&tasklist_lock);
+ for_each_task(p)
+ p->counter = (p->counter >> 1) + p->priority;
+ read_unlock(&tasklist_lock);
+ }
spin_lock_irq(&runqueue_lock);
}
goto repeat_schedule;
--- linux-2.3.99-pre3/kernel/fork.c.orig Mon Mar 27 16:05:45 2000
+++ linux-2.3.99-pre3/kernel/fork.c Wed Mar 29 11:24:53 2000
@@ -17,6 +17,7 @@
#include <linux/smp_lock.h>
#include <linux/module.h>
#include <linux/vmalloc.h>
+#include <linux/sched.h>

#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -44,13 +45,16 @@
  */
#define UIDHASH_SZ (PIDHASH_SZ >> 2)

-static struct user_struct {
- atomic_t count;
- struct user_struct *next, **pprev;
- unsigned int uid;
-} *uidhash[UIDHASH_SZ];
+struct user_struct *uidhash[UIDHASH_SZ];
+struct user_struct uidhead = {
+ ATOMIC_INIT(0),
+ &uidhead, (struct user_struct **) &uidhead,
+ &uidhead, &uidhead,
+ 0,
+ DEF_PRIORITY,
+};

-spinlock_t uidhash_lock = SPIN_LOCK_UNLOCKED;
+rwlock_t uidhash_lock = RW_LOCK_UNLOCKED;

kmem_cache_t *uid_cachep;

@@ -65,6 +69,10 @@
uidhash[hashent]->pprev = &up->next;
up->pprev = &uidhash[hashent];
uidhash[hashent] = up;
+ up->l_next = uidhead.l_next;
+ up->l_prev = &uidhead;
+ uidhead.l_next->l_prev = up;
+ uidhead.l_next = up;
}

static inline void uid_hash_remove(struct user_struct *up)
@@ -72,6 +80,8 @@
if(up->next)
up->next->pprev = up->pprev;
*up->pprev = up->next;
+ up->l_prev->l_next = up->l_prev;
+ up->l_next->l_prev = up->l_next;
}

static inline struct user_struct *uid_hash_find(unsigned short uid, unsigned int hashent)
@@ -111,12 +121,12 @@
if (up) {
p->user = NULL;
if (atomic_dec_and_test(&up->count)) {
- spin_lock(&uidhash_lock);
+ write_lock(&uidhash_lock);
if (uid_hash_free(up)) {
uid_hash_remove(up);
kmem_cache_free(uid_cachep, up);
}
- spin_unlock(&uidhash_lock);
+ write_unlock(&uidhash_lock);
}
}
}
@@ -126,9 +136,9 @@
unsigned int hashent = uidhashfn(p->uid);
struct user_struct *up;

- spin_lock(&uidhash_lock);
+ read_lock(&uidhash_lock);
up = uid_hash_find(p->uid, hashent);
- spin_unlock(&uidhash_lock);
+ read_unlock(&uidhash_lock);

if (!up) {
struct user_struct *new;
@@ -138,12 +148,13 @@
return -EAGAIN;
new->uid = p->uid;
atomic_set(&new->count, 1);
+ new->cpu_ticks = DEF_PRIORITY;

/*
   * Before adding this, check whether we raced
   * on adding the same user already..
   */
- spin_lock(&uidhash_lock);
+ write_lock(&uidhash_lock);
up = uid_hash_find(p->uid, hashent);
if (up) {
kmem_cache_free(uid_cachep, new);
@@ -151,7 +162,7 @@
uid_hash_insert(new, hashent);
up = new;
}
- spin_unlock(&uidhash_lock);
+ write_unlock(&uidhash_lock);

}
p->user = up;
--- linux-2.3.99-pre3/kernel/sysctl.c.orig Mon Mar 27 16:56:50 2000
+++ linux-2.3.99-pre3/kernel/sysctl.c Mon Mar 27 17:03:54 2000
@@ -46,6 +46,7 @@
extern int sysctl_overcommit_memory;
extern int max_threads;
extern int nr_queued_signals, max_queued_signals;
+extern int fairsched;

/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
static int maxolduid = 65535;
@@ -222,6 +223,10 @@
{KERN_OVERFLOWGID, "overflowgid", &overflowgid, sizeof(int), 0644, NULL,
   &proc_dointvec_minmax, &sysctl_intvec, NULL,
   &minolduid, &maxolduid},
+#ifdef CONFIG_FAIRSCHED
+ {KERN_FAIRSCHED, "fairsched", &fairsched, sizeof(int),
+    0644, NULL, &proc_dointvec},
+#endif
{0}
};

--- linux-2.3.99-pre3/include/linux/sched.h.orig Mon Mar 27 14:07:14 2000
+++ linux-2.3.99-pre3/include/linux/sched.h Tue Mar 28 11:47:12 2000
@@ -255,7 +255,13 @@
  * Right now it is only used to track how many processes a
  * user has, but it has the potential to track memory usage etc.
  */
-struct user_struct;
+struct user_struct {
+ atomic_t count;
+ struct user_struct *next, **pprev;
+ struct user_struct *l_next, *l_prev;
+ unsigned int uid;
+ long cpu_ticks;
+};

struct task_struct {
/* these are hardcoded - don't touch */
@@ -448,6 +454,8 @@
/* PID hashing. (shouldnt this be dynamic?) */
#define PIDHASH_SZ (4096 >> 2)
extern struct task_struct *pidhash[PIDHASH_SZ];
+extern struct user_struct uidhead;
+extern rwlock_t uidhash_lock;

#define pid_hashfn(x) ((((x) >> 8) ^ (x)) & (PIDHASH_SZ - 1))

@@ -822,6 +830,18 @@
#define for_each_task(p) \
for (p = &init_task ; (p = p->next_task) != &init_task ; )

+#define for_each_user_struct(up) \
+ for (up = &uidhead ; (up = up->l_next) != &uidhead ; )
+
+static inline void move_tq_head(struct task_struct * ran_out)
+{
+ init_task.prev_task->next_task = init_task.next_task;
+ init_task.next_task->prev_task = init_task.prev_task;
+ init_task.next_task = (ran_out)->next_task;
+ init_task.prev_task = (ran_out);
+ (ran_out)->next_task->prev_task = &init_task;
+ (ran_out)->next_task = &init_task;
+}

static inline void del_from_runqueue(struct task_struct * p)
{
--- linux-2.3.99-pre3/include/linux/sysctl.h.orig Mon Mar 27 16:55:49 2000
+++ linux-2.3.99-pre3/include/linux/sysctl.h Mon Mar 27 16:56:41 2000
@@ -112,6 +112,7 @@
KERN_OVERFLOWUID=46, /* int: overflow UID */
KERN_OVERFLOWGID=47, /* int: overflow GID */
KERN_SHMPATH=48, /* string: path to shm fs */
+ KERN_FAIRSCHED=49, /* turn fair scheduler on/off */
};

--- linux-2.3.99-pre3/arch/i386/config.in.orig Mon Mar 27 17:04:26 2000
+++ linux-2.3.99-pre3/arch/i386/config.in Mon Mar 27 17:06:56 2000
@@ -138,6 +138,9 @@
source drivers/pcmcia/Config.in
fi

+if [ "$CONFIG_EXPERIMENTAL" = "y" ] ; then
+ bool 'Fair scheduler' CONFIG_FAIRSCHED
+fi
bool 'System V IPC' CONFIG_SYSVIPC
bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT
bool 'Sysctl support' CONFIG_SYSCTL

hit007 · 发表于 2003-5-10 11:39:43

关键是以下语句：

+ if (!up->cpu_ticks)
+ continue;
+ oldcounter = p->counter;
+ p->counter = (p->counter >> 1) + p->priority;
+ up->cpu_ticks += oldcounter;
+ up->cpu_ticks -= p->counter;
+ if (up->cpu_ticks <= 0 && ran_out == NULL) {
+ up->cpu_ticks = 0;
+ ran_out = p;

1，fair schedule主要将以进程为单位调度编成以用户为单位调度
2，新加调度代码的运行时机是在所有的进程的时间片都用完的情况下
3，运行原理：
   a,到新加调度代码运行时，也就是重新开始分配各进程运行时间时：
      if (!up->cpu_ticks) //用户的时间用完了，则其所拥有的所有进程不能分得时间，等待；
      否则，
      oldcounter = p->counter;
         p->counter = (p->counter >> 1) + p->priority;//按照原来的linux调度方式来分配各进程时间。同时：
         up->cpu_ticks += oldcounter;//怕有睡眠进程有没完的时间，积累起来
         up->cpu_ticks -= p->counter;//减去分出去的时间。
      b,进程在原调度方案下切换运行，直到再次满足新调度代码运行条件。

其中 up->cpu_ticks 象个管家，起始的时候所有的用户都一样，所以就实现了用户为单位的调度，也就是各用户的拥有的进程运行时间一样。

以上只是大概描述，具体代码的用意可以考虑各种情景来分析。

我可是花了3天才看懂了这几行代码的，我看的是李善平编的《边干边学》，那里面讲得比较清晰

howema · 发表于 2007-4-29 08:44:27

哪位高人有李善平编的《边干边学》电子版，可否发给小弟，或者有关这个算法的资料！谢谢
howema@126.com

sturdy1606 · 发表于 2007-4-30 21:07:37

LZ做得怎么样啊,,,,,我也想做这个,能不能交流,,,,,

		自动登录	找回密码
密码			注册

如何在内核2.4.20-8中添加新的进程调度算法

回复：

一点理解

无