Based on ideas of Ashwin Chaugule. Priority based swap token passing. Signed-off-by: Peter Zijlstra --- include/linux/sched.h | 6 +- include/linux/swap.h | 1 kernel/sysctl.c | 11 ---- mm/thrash.c | 125 ++++++++++++++++++++++---------------------------- 4 files changed, 58 insertions(+), 85 deletions(-) Index: linux-2.6/include/linux/sched.h =================================================================== --- linux-2.6.orig/include/linux/sched.h +++ linux-2.6/include/linux/sched.h @@ -342,9 +342,9 @@ struct mm_struct { /* Architecture-specific MM context */ mm_context_t context; - /* Token based thrashing protection. */ - unsigned long swap_token_time; - char recent_pagein; + /* Swap token stuff */ + unsigned long swap_token_stamp; + int swap_token_prio; /* coredumping support */ int core_waiters; Index: linux-2.6/include/linux/swap.h =================================================================== --- linux-2.6.orig/include/linux/swap.h +++ linux-2.6/include/linux/swap.h @@ -259,7 +259,6 @@ extern spinlock_t swap_lock; /* linux/mm/thrash.c */ extern struct mm_struct * swap_token_mm; -extern unsigned long swap_token_default_timeout; extern void grab_swap_token(void); extern void __put_swap_token(struct mm_struct *); Index: linux-2.6/kernel/sysctl.c =================================================================== --- linux-2.6.orig/kernel/sysctl.c +++ linux-2.6/kernel/sysctl.c @@ -930,17 +930,6 @@ static ctl_table vm_table[] = { .extra1 = &zero, }, #endif -#ifdef CONFIG_SWAP - { - .ctl_name = VM_SWAP_TOKEN_TIMEOUT, - .procname = "swap_token_timeout", - .data = &swap_token_default_timeout, - .maxlen = sizeof(swap_token_default_timeout), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, - }, -#endif #ifdef CONFIG_NUMA { .ctl_name = VM_ZONE_RECLAIM_MODE, Index: linux-2.6/mm/thrash.c =================================================================== --- linux-2.6.orig/mm/thrash.c +++ linux-2.6/mm/thrash.c @@ -14,93 +14,78 @@ #include static DEFINE_SPINLOCK(swap_token_lock); -static unsigned long swap_token_timeout; -static unsigned long swap_token_check; struct mm_struct * swap_token_mm = &init_mm; +static unsigned long swap_token_time = 0; +static long swap_token_int = 5; -#define SWAP_TOKEN_CHECK_INTERVAL (HZ * 2) -#define SWAP_TOKEN_TIMEOUT (300 * HZ) /* - * Currently disabled; Needs further code to work at HZ * 300. - */ -unsigned long swap_token_default_timeout = SWAP_TOKEN_TIMEOUT; - -/* - * Take the token away if the process had no page faults - * in the last interval, or if it has held the token for - * too long. - */ -#define SWAP_TOKEN_ENOUGH_RSS 1 -#define SWAP_TOKEN_TIMED_OUT 2 -static int should_release_swap_token(struct mm_struct *mm) -{ - int ret = 0; - if (!mm->recent_pagein) - ret = SWAP_TOKEN_ENOUGH_RSS; - else if (time_after(jiffies, swap_token_timeout)) - ret = SWAP_TOKEN_TIMED_OUT; - mm->recent_pagein = 0; - return ret; -} - -/* - * Try to grab the swapout protection token. We only try to - * grab it once every TOKEN_CHECK_INTERVAL, both to prevent - * SMP lock contention and to check that the process that held - * the token before is no longer thrashing. + * grab_swap_token() - try to obtain the swap token + * + * Each attempt is seem as a swap token time event. + * + * Maintain the average grab interval, when a grab attempt is + * faster than the average the prio is increased. This inequality is formulated + * so that its tends to decrease the prio. + * + * However since its the avg interval, someone will be faster. Hence someone + * will have a higher prio. + * + * When the attempting mm's priority is found higher than the current holder's + * priority, the holder yields the token. The new holder will get a prio boost. + * However because the normal priority adjustment slightly favours decrements + * it will normalise again. */ void grab_swap_token(void) { - struct mm_struct *mm; - int reason; + long curr_int; - /* We have the token. Let others know we still need it. */ - if (has_swap_token(current->mm)) { - current->mm->recent_pagein = 1; - if (unlikely(!swap_token_default_timeout)) - disable_swap_token(); + /* + * swap token time + */ + swap_token_time++; + + if (!spin_trylock(&swap_token_lock)) return; - } - if (time_after(jiffies, swap_token_check)) { + curr_int = swap_token_time - current->mm->swap_token_stamp; + current->mm->swap_token_stamp = swap_token_time; + + /* + * faster than average attempts gain priority + */ + if (curr_int < swap_token_int) + current->mm->swap_token_prio++; + else { + current->mm->swap_token_prio--; + if (unlikely(current->mm->swap_token_prio < 0)) + current->mm->swap_token_prio = 0; + } - if (!swap_token_default_timeout) { - swap_token_check = jiffies + SWAP_TOKEN_CHECK_INTERVAL; - return; - } - - /* ... or if we recently held the token. */ - if (time_before(jiffies, current->mm->swap_token_time)) - return; - - if (!spin_trylock(&swap_token_lock)) - return; - - swap_token_check = jiffies + SWAP_TOKEN_CHECK_INTERVAL; - - mm = swap_token_mm; - if ((reason = should_release_swap_token(mm))) { - unsigned long eligible = jiffies; - if (reason == SWAP_TOKEN_TIMED_OUT) { - eligible += swap_token_default_timeout; - } - mm->swap_token_time = eligible; - swap_token_timeout = jiffies + swap_token_default_timeout; - swap_token_mm = current->mm; - } - spin_unlock(&swap_token_lock); + /* + * adjust the avg interval + */ + swap_token_int += (curr_int - swap_token_int) / 2; + + /* + * yield token to higher priority contender + */ + if (current->mm->swap_token_prio > swap_token_mm->swap_token_prio) { + /* + * boost new holders prio to give a temp advantage + */ + current->mm->swap_token_prio += + current->mm->swap_token_prio >> 1; + swap_token_mm = current->mm; } - return; + + spin_unlock(&swap_token_lock); } /* Called on process exit. */ void __put_swap_token(struct mm_struct *mm) { spin_lock(&swap_token_lock); - if (likely(mm == swap_token_mm)) { - mm->swap_token_time = jiffies + SWAP_TOKEN_CHECK_INTERVAL; + if (likely(mm == swap_token_mm)) swap_token_mm = &init_mm; - swap_token_check = jiffies; - } spin_unlock(&swap_token_lock); }