LinuxSir.cn,穿越时空的Linuxsir!

 找回密码
 注册
搜索
热搜: shell linux mysql
查看: 1897|回复: 8

如何在内核2.4.20-8中添加新的进程调度算法

[复制链接]
发表于 2003-4-24 22:27:37 | 显示全部楼层 |阅读模式
内核2.4.20-8就是Red Hat 9.0使用的内核

我想在这个内核中添加一种新的进程调度算法(相对于SCHED_OTHER,SCHED_FIFO,SCHED_RR而言),我不晓得应该怎么去做。我看得资料都比较老,没有关于2.4.20-8内核的介绍,Red Hat自己附带的文档也太少了。

望各位大侠提点一下,小弟万分感激。
 楼主| 发表于 2003-4-24 23:07:59 | 显示全部楼层
更正一下,

2.4.20-8中叫SCHED_NORMAL不是SCHED_OTHER
 楼主| 发表于 2003-4-25 10:11:37 | 显示全部楼层
up
发表于 2003-5-4 17:05:35 | 显示全部楼层
最近我也在作这个,初步想做一个多级优先对列,减少在调度时遍历整个run_task的时间
发表于 2003-5-9 03:53:42 | 显示全部楼层

回复:

我以前也想过类似的,  但终归无用,你可以查看一下linux code process ,它里面的进程调度算法中心很明确,  我想如果在加一加二的话,会画蛇填足, 但想加去的方法当然是有的, 但现在kernel version很多不同,我想需要分个类,  不过把你的算法拿出来, 一起研究!
发表于 2003-5-9 23:31:22 | 显示全部楼层
不知道大家看过Rik van Riel写的fair share scheduler patch for 2.3.99-pre3  没有?
写得很简单,但是我没有怎么看懂。不知道哪位大侠能帮忙分析一下:
--- linux-2.3.99-pre3/kernel/sched.c.orig        Mon Mar 27 14:05:25 2000
+++ linux-2.3.99-pre3/kernel/sched.c        Wed Mar 29 18:37:21 2000
@@ -41,6 +41,10 @@

extern void mem_use(void);

+#ifdef CONFIG_FAIRSCHED
+int fairsched = 0; /* toggle fair scheduler on/off */
+#endif /* CONFIG_FAIRSCHED */
+
/*
  *        Init task must be ok at boot for the ix86 as we will check its signals
  *        via the SMP irq return path.
@@ -61,6 +65,7 @@
  */
spinlock_t runqueue_lock = SPIN_LOCK_UNLOCKED;  /* second */
rwlock_t tasklist_lock = RW_LOCK_UNLOCKED;        /* third */
+spinlock_t recalc_lock = SPIN_LOCK_UNLOCKED;

static LIST_HEAD(runqueue_head);

@@ -600,10 +605,61 @@
        {
                struct task_struct *p;
                spin_unlock_irq(&runqueue_lock);
-                read_lock(&tasklist_lock);
-                for_each_task(p)
-                        p->counter = (p->counter >> 1) + p->priority;
-                read_unlock(&tasklist_lock);
+#ifdef CONFIG_FAIRSCHED
+        /*
+         * Simple, low-overhead fair share scheduler. It works by handing
+         * out CPU time like we do at the normal recalculation.
+         * The catch is that we move the list head (where the for_each_task()
+         * loop starts) to _after_ the first task where we ran out of quota.
+         * This means that if a user has too many runnable processes, his
+         * tasks will get extra CPU time here in turns. -- Rik
+         */
+                if (fairsched) {
+                        struct task_struct *ran_out = NULL;
+                        struct user_struct *up;
+                        long oldcounter;
+                        // if (!spin_trylock(&recalc_lock)) {
+                        //         spin_lock_irq(&runqueue_lock);
+                        //         goto repeat_schedule;
+                        // }
+                        write_lock(&tasklist_lock);
+                        for_each_task(p) {
+                                up = p->user;
+                                if (!up) {
+                                        p->counter = (p->counter >> 1) + p->priority;
+                                        continue;
+                                }
+                                if (!up->cpu_ticks)
+                                        continue;
+                                oldcounter = p->counter;
+                                p->counter = (p->counter >> 1) + p->priority;
+                                up->cpu_ticks += oldcounter;
+                                up->cpu_ticks -= p->counter;
+                                if (up->cpu_ticks <= 0 && ran_out == NULL) {
+                                        up->cpu_ticks = 0;
+                                        ran_out = p;
+                                }
+                        }
+                        if (ran_out) {
+                                move_tq_head(ran_out);
+                        }
+                        write_unlock(&tasklist_lock);
+
+                        read_lock(&uidhash_lock);
+                        for_each_user_struct(up) {
+                                /* replace DEF_PRIORITY with user quota */
+                                up->cpu_ticks = (up->cpu_ticks >> 1) + DEF_PRIORITY;
+                        }
+                        read_unlock(&uidhash_lock);
+                        // spin_unlock(&recalc_lock);
+                } else
+#endif /* CONFIG_FAIRSCHED */
+                {
+                        read_lock(&tasklist_lock);
+                        for_each_task(p)
+                                p->counter = (p->counter >> 1) + p->priority;
+                        read_unlock(&tasklist_lock);
+                }
                spin_lock_irq(&runqueue_lock);
        }
        goto repeat_schedule;
--- linux-2.3.99-pre3/kernel/fork.c.orig        Mon Mar 27 16:05:45 2000
+++ linux-2.3.99-pre3/kernel/fork.c        Wed Mar 29 11:24:53 2000
@@ -17,6 +17,7 @@
#include <linux/smp_lock.h>
#include <linux/module.h>
#include <linux/vmalloc.h>
+#include <linux/sched.h>

#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -44,13 +45,16 @@
  */
#define UIDHASH_SZ        (PIDHASH_SZ >> 2)

-static struct user_struct {
-        atomic_t count;
-        struct user_struct *next, **pprev;
-        unsigned int uid;
-} *uidhash[UIDHASH_SZ];
+struct user_struct *uidhash[UIDHASH_SZ];
+struct user_struct uidhead = {
+        ATOMIC_INIT(0),
+        &uidhead, (struct user_struct **) &uidhead,
+        &uidhead, &uidhead,
+        0,
+        DEF_PRIORITY,
+};

-spinlock_t uidhash_lock = SPIN_LOCK_UNLOCKED;
+rwlock_t uidhash_lock = RW_LOCK_UNLOCKED;

kmem_cache_t *uid_cachep;

@@ -65,6 +69,10 @@
                uidhash[hashent]->pprev = &up->next;
        up->pprev = &uidhash[hashent];
        uidhash[hashent] = up;
+        up->l_next = uidhead.l_next;
+        up->l_prev = &uidhead;
+        uidhead.l_next->l_prev = up;
+        uidhead.l_next = up;
}

static inline void uid_hash_remove(struct user_struct *up)
@@ -72,6 +80,8 @@
        if(up->next)
                up->next->pprev = up->pprev;
        *up->pprev = up->next;
+        up->l_prev->l_next = up->l_prev;
+        up->l_next->l_prev = up->l_next;
}

static inline struct user_struct *uid_hash_find(unsigned short uid, unsigned int hashent)
@@ -111,12 +121,12 @@
        if (up) {
                p->user = NULL;
                if (atomic_dec_and_test(&up->count)) {
-                        spin_lock(&uidhash_lock);
+                        write_lock(&uidhash_lock);
                        if (uid_hash_free(up)) {
                                uid_hash_remove(up);
                                kmem_cache_free(uid_cachep, up);
                        }
-                        spin_unlock(&uidhash_lock);
+                        write_unlock(&uidhash_lock);
                }
        }
}
@@ -126,9 +136,9 @@
        unsigned int hashent = uidhashfn(p->uid);
        struct user_struct *up;

-        spin_lock(&uidhash_lock);
+        read_lock(&uidhash_lock);
        up = uid_hash_find(p->uid, hashent);
-        spin_unlock(&uidhash_lock);
+        read_unlock(&uidhash_lock);

        if (!up) {
                struct user_struct *new;
@@ -138,12 +148,13 @@
                        return -EAGAIN;
                new->uid = p->uid;
                atomic_set(&new->count, 1);
+                new->cpu_ticks = DEF_PRIORITY;

                /*
                 * Before adding this, check whether we raced
                 * on adding the same user already..
                 */
-                spin_lock(&uidhash_lock);
+                write_lock(&uidhash_lock);
                up = uid_hash_find(p->uid, hashent);
                if (up) {
                        kmem_cache_free(uid_cachep, new);
@@ -151,7 +162,7 @@
                        uid_hash_insert(new, hashent);
                        up = new;
                }
-                spin_unlock(&uidhash_lock);
+                write_unlock(&uidhash_lock);

        }
        p->user = up;
--- linux-2.3.99-pre3/kernel/sysctl.c.orig        Mon Mar 27 16:56:50 2000
+++ linux-2.3.99-pre3/kernel/sysctl.c        Mon Mar 27 17:03:54 2000
@@ -46,6 +46,7 @@
extern int sysctl_overcommit_memory;
extern int max_threads;
extern int nr_queued_signals, max_queued_signals;
+extern int fairsched;

/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
static int maxolduid = 65535;
@@ -222,6 +223,10 @@
        {KERN_OVERFLOWGID, "overflowgid", &overflowgid, sizeof(int), 0644, NULL,
         &proc_dointvec_minmax, &sysctl_intvec, NULL,
         &minolduid, &maxolduid},
+#ifdef CONFIG_FAIRSCHED
+        {KERN_FAIRSCHED, "fairsched", &fairsched, sizeof(int),
+         0644, NULL, &proc_dointvec},
+#endif
        {0}
};

--- linux-2.3.99-pre3/include/linux/sched.h.orig        Mon Mar 27 14:07:14 2000
+++ linux-2.3.99-pre3/include/linux/sched.h        Tue Mar 28 11:47:12 2000
@@ -255,7 +255,13 @@
  * Right now it is only used to track how many processes a
  * user has, but it has the potential to track memory usage etc.
  */
-struct user_struct;
+struct user_struct {
+        atomic_t count;
+        struct user_struct *next, **pprev;
+        struct user_struct *l_next, *l_prev;
+        unsigned int uid;
+        long cpu_ticks;
+};

struct task_struct {
/* these are hardcoded - don't touch */
@@ -448,6 +454,8 @@
/* PID hashing. (shouldnt this be dynamic?) */
#define PIDHASH_SZ (4096 >> 2)
extern struct task_struct *pidhash[PIDHASH_SZ];
+extern struct user_struct uidhead;
+extern rwlock_t uidhash_lock;

#define pid_hashfn(x)        ((((x) >> 8) ^ (x)) & (PIDHASH_SZ - 1))

@@ -822,6 +830,18 @@
#define for_each_task(p) \
        for (p = &init_task ; (p = p->next_task) != &init_task ; )

+#define for_each_user_struct(up) \
+        for (up = &uidhead ; (up = up->l_next) != &uidhead ; )
+
+static inline void move_tq_head(struct task_struct * ran_out)
+{
+        init_task.prev_task->next_task = init_task.next_task;
+        init_task.next_task->prev_task = init_task.prev_task;
+        init_task.next_task = (ran_out)->next_task;
+        init_task.prev_task = (ran_out);
+        (ran_out)->next_task->prev_task = &init_task;
+        (ran_out)->next_task = &init_task;
+}

static inline void del_from_runqueue(struct task_struct * p)
{
--- linux-2.3.99-pre3/include/linux/sysctl.h.orig        Mon Mar 27 16:55:49 2000
+++ linux-2.3.99-pre3/include/linux/sysctl.h        Mon Mar 27 16:56:41 2000
@@ -112,6 +112,7 @@
        KERN_OVERFLOWUID=46,        /* int: overflow UID */
        KERN_OVERFLOWGID=47,        /* int: overflow GID */
        KERN_SHMPATH=48,        /* string: path to shm fs */
+        KERN_FAIRSCHED=49,        /* turn fair scheduler on/off */
};


--- linux-2.3.99-pre3/arch/i386/config.in.orig        Mon Mar 27 17:04:26 2000
+++ linux-2.3.99-pre3/arch/i386/config.in        Mon Mar 27 17:06:56 2000
@@ -138,6 +138,9 @@
    source drivers/pcmcia/Config.in
fi

+if [ "$CONFIG_EXPERIMENTAL" = "y" ] ; then
+   bool 'Fair scheduler' CONFIG_FAIRSCHED
+fi
bool 'System V IPC' CONFIG_SYSVIPC
bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT
bool 'Sysctl support' CONFIG_SYSCTL
发表于 2003-5-10 11:39:43 | 显示全部楼层

一点理解

关键是以下语句:

+ if (!up->cpu_ticks)
+ continue;
+ oldcounter = p->counter;
+ p->counter = (p->counter >> 1) + p->priority;
+ up->cpu_ticks += oldcounter;
+ up->cpu_ticks -= p->counter;
+ if (up->cpu_ticks <= 0 && ran_out == NULL) {
+ up->cpu_ticks = 0;
+ ran_out = p;

1,fair schedule主要将以进程为单位调度编成以用户为单位调度
2,新加调度代码的运行时机是在所有的进程的时间片都用完的情况下
3,运行原理:
       a,到新加调度代码运行时,也就是重新开始分配各进程运行时间时:
          if (!up->cpu_ticks) //用户的时间用完了,则其所拥有的所有进程不能分得时间,等待;
          否则,
          oldcounter = p->counter;
           p->counter = (p->counter >> 1) + p->priority;//按照原来的linux调度方式来分配各进程时间。同时:
           up->cpu_ticks += oldcounter;//怕有睡眠进程有没完的时间,积累起来
           up->cpu_ticks -= p->counter;//减去分出去的时间。
         b,进程在原调度方案下切换运行,直到再次满足新调度代码运行条件。


其中 up->cpu_ticks 象个管家,起始的时候所有的用户都一样,所以就实现了用户为单位的调度,也就是各用户的拥有的进程运行时间一样。



以上只是大概描述,具体代码的用意可以考虑各种情景来分析。


我可是花了3天才看懂了这几行代码的,我看的是李善平编的《边干边学》,那里面讲得比较清晰
发表于 2007-4-29 08:44:27 | 显示全部楼层
哪位高人有李善平编的《边干边学》电子版,可否发给小弟,或者有关这个算法的资料!谢谢
howema@126.com
回复 支持 反对

使用道具 举报

发表于 2007-4-30 21:07:37 | 显示全部楼层

LZ做得怎么样啊,,,,,我也想做这个,能不能交流,,,,,
回复 支持 反对

使用道具 举报

您需要登录后才可以回帖 登录 | 注册

本版积分规则

快速回复 返回顶部 返回列表