patch-2.3.23 linux/mm/page_alloc.c

Next file: linux/mm/page_io.c
Previous file: linux/mm/mremap.c
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.3.22/linux/mm/page_alloc.c linux/mm/page_alloc.c
@@ -14,7 +14,8 @@
 #include <linux/interrupt.h>
 #include <linux/init.h>
 #include <linux/pagemap.h>
-#include <linux/bigmem.h> /* export bigmem vars */
+#include <linux/highmem.h>
+#include <linux/bootmem.h>
 
 #include <asm/dma.h>
 #include <asm/uaccess.h> /* for copy_to/from_user */
@@ -40,45 +41,25 @@
 #define NR_MEM_LISTS 10
 #endif
 
-/* The start of this MUST match the start of "struct page" */
 struct free_area_struct {
-	struct page *next;
-	struct page *prev;
+	struct list_head free_list;
 	unsigned int * map;
+	unsigned long count;
 };
 
-#define memory_head(x) ((struct page *)(x))
+#define MEM_TYPE_DMA		0
+#define MEM_TYPE_NORMAL		1
+#define MEM_TYPE_HIGH		2
 
-#ifdef CONFIG_BIGMEM
-#define BIGMEM_LISTS_OFFSET	NR_MEM_LISTS
-static struct free_area_struct free_area[NR_MEM_LISTS*2];
+static const char *mem_type_strs[] = {"DMA", "Normal", "High"};
+
+#ifdef CONFIG_HIGHMEM
+#define NR_MEM_TYPES		3
 #else
-static struct free_area_struct free_area[NR_MEM_LISTS];
+#define NR_MEM_TYPES		2
 #endif
 
-static inline void init_mem_queue(struct free_area_struct * head)
-{
-	head->next = memory_head(head);
-	head->prev = memory_head(head);
-}
-
-static inline void add_mem_queue(struct free_area_struct * head, struct page * entry)
-{
-	struct page * next = head->next;
-
-	entry->prev = memory_head(head);
-	entry->next = next;
-	next->prev = entry;
-	head->next = entry;
-}
-
-static inline void remove_mem_queue(struct page * entry)
-{
-	struct page * next = entry->next;
-	struct page * prev = entry->prev;
-	next->prev = prev;
-	prev->next = next;
-}
+static struct free_area_struct free_area[NR_MEM_TYPES][NR_MEM_LISTS];
 
 /*
  * Free_page() adds the page to the free lists. This is optimized for
@@ -99,41 +80,76 @@
  */
 spinlock_t page_alloc_lock = SPIN_LOCK_UNLOCKED;
 
-static inline void free_pages_ok(unsigned long map_nr, unsigned long order)
+#define memlist_init(x) INIT_LIST_HEAD(x)
+#define memlist_add_head list_add
+#define memlist_add_tail list_add_tail
+#define memlist_del list_del
+#define memlist_entry list_entry
+#define memlist_next(x) ((x)->next)
+#define memlist_prev(x) ((x)->prev)
+
+static inline void free_pages_ok(struct page *page, unsigned long map_nr, unsigned long order)
 {
-	struct free_area_struct *area = free_area + order;
+	struct free_area_struct *area;
 	unsigned long index = map_nr >> (1 + order);
 	unsigned long mask = (~0UL) << order;
 	unsigned long flags;
+	struct page *buddy;
 
 	spin_lock_irqsave(&page_alloc_lock, flags);
 
 #define list(x) (mem_map+(x))
 
-#ifdef CONFIG_BIGMEM
-	if (map_nr >= bigmem_mapnr) {
-		area += BIGMEM_LISTS_OFFSET;
-		nr_free_bigpages -= mask;
-	}
+#ifdef CONFIG_HIGHMEM
+	if (map_nr >= highmem_mapnr) {
+		area = free_area[MEM_TYPE_HIGH];
+		nr_free_highpages -= mask;
+	} else
 #endif
+	if (PageDMA(page))
+		area = free_area[MEM_TYPE_DMA];
+	else
+		area = free_area[MEM_TYPE_NORMAL];
+
+	area += order;
+
 	map_nr &= mask;
 	nr_free_pages -= mask;
+
 	while (mask + (1 << (NR_MEM_LISTS-1))) {
 		if (!test_and_change_bit(index, area->map))
+			/*
+			 * the buddy page is still allocated.
+			 */
 			break;
-		remove_mem_queue(list(map_nr ^ -mask));
+		/*
+		 * Move the buddy up one level.
+		 */
+		buddy = list(map_nr ^ -mask);
+		page = list(map_nr);
+
+		area->count--;
+		memlist_del(&buddy->list);
 		mask <<= 1;
 		area++;
 		index >>= 1;
 		map_nr &= mask;
 	}
-	add_mem_queue(area, list(map_nr));
-
+	area->count++;
+	memlist_add_head(&(list(map_nr))->list, &area->free_list);
 #undef list
 
 	spin_unlock_irqrestore(&page_alloc_lock, flags);
 }
 
+/*
+ * Some ugly macros to speed up __get_free_pages()..
+ */
+#define MARK_USED(index, order, area) \
+	change_bit((index) >> (1+(order)), (area)->map)
+#define CAN_DMA(x) (PageDMA(x))
+#define ADDRESS(x) (PAGE_OFFSET + ((x) << PAGE_SHIFT))
+
 int __free_page(struct page *page)
 {
 	if (!PageReserved(page) && put_page_testzero(page)) {
@@ -142,7 +158,7 @@
 		if (PageLocked(page))
 			PAGE_BUG(page);
 
-		free_pages_ok(page - mem_map, 0);
+		free_pages_ok(page, page-mem_map, 0);
 		return 1;
 	}
 	return 0;
@@ -159,154 +175,155 @@
 				PAGE_BUG(map);
 			if (PageLocked(map))
 				PAGE_BUG(map);
-			free_pages_ok(map_nr, order);
+			free_pages_ok(map, map_nr, order);
 			return 1;
 		}
 	}
 	return 0;
 }
 
-/*
- * Some ugly macros to speed up __get_free_pages()..
- */
-#define MARK_USED(index, order, area) \
-	change_bit((index) >> (1+(order)), (area)->map)
-#define CAN_DMA(x) (PageDMA(x))
-#define ADDRESS(x) (PAGE_OFFSET + ((x) << PAGE_SHIFT))
+static inline unsigned long EXPAND (struct page *map, unsigned long index,
+		 int low, int high, struct free_area_struct * area)
+{
+	unsigned long size = 1 << high;
 
-#ifdef CONFIG_BIGMEM
-#define RMQUEUEBIG(order, gfp_mask) \
-if (gfp_mask & __GFP_BIGMEM) { \
-	struct free_area_struct * area = free_area+order+BIGMEM_LISTS_OFFSET; \
-	unsigned long new_order = order; \
-	do { struct page *prev = memory_head(area), *ret = prev->next; \
-		if (memory_head(area) != ret) { \
-			unsigned long map_nr; \
-			(prev->next = ret->next)->prev = prev; \
-			map_nr = ret - mem_map; \
-			MARK_USED(map_nr, new_order, area); \
-			nr_free_pages -= 1 << order; \
-			nr_free_bigpages -= 1 << order; \
-			EXPAND(ret, map_nr, order, new_order, area); \
-			spin_unlock_irqrestore(&page_alloc_lock, flags); \
-			return ADDRESS(map_nr); \
-		} \
-		new_order++; area++; \
-	} while (new_order < NR_MEM_LISTS); \
+	while (high > low) {
+		area--;
+		high--;
+		size >>= 1;
+		area->count++;
+		memlist_add_head(&(map)->list, &(area)->free_list);
+		MARK_USED(index, high, area);
+		index += size;
+		map += size;
+	}
+	set_page_count(map, 1);
+	return index;
 }
-#endif
-
-#define RMQUEUE(order, gfp_mask) \
-do { struct free_area_struct * area = free_area+order; \
-     unsigned long new_order = order; \
-	do { struct page *prev = memory_head(area), *ret = prev->next; \
-		while (memory_head(area) != ret) { \
-			if (!(gfp_mask & __GFP_DMA) || CAN_DMA(ret)) { \
-				unsigned long map_nr; \
-				(prev->next = ret->next)->prev = prev; \
-				map_nr = ret - mem_map; \
-				MARK_USED(map_nr, new_order, area); \
-				nr_free_pages -= 1 << order; \
-				EXPAND(ret, map_nr, order, new_order, area); \
-				spin_unlock_irqrestore(&page_alloc_lock,flags);\
-				return ADDRESS(map_nr); \
-			} \
-			prev = ret; \
-			ret = ret->next; \
-		} \
-		new_order++; area++; \
-	} while (new_order < NR_MEM_LISTS); \
-} while (0)
-
-#define EXPAND(map,index,low,high,area) \
-do { unsigned long size = 1 << high; \
-	while (high > low) { \
-		area--; high--; size >>= 1; \
-		add_mem_queue(area, map); \
-		MARK_USED(index, high, area); \
-		index += size; \
-		map += size; \
-	} \
-	set_page_count(map, 1); \
-} while (0)
 
-unsigned long __get_free_pages(int gfp_mask, unsigned long order)
+static inline struct page * rmqueue (int order, unsigned type)
 {
-	unsigned long flags;
+	struct free_area_struct * area = free_area[type]+order;
+	unsigned long curr_order = order, map_nr;
+	struct page *page;
+	struct list_head *head, *curr;
 
-	if (order >= NR_MEM_LISTS)
-		goto nopage;
+	do {
+		head = &area->free_list;
+		curr = memlist_next(head);
 
-#ifdef ATOMIC_MEMORY_DEBUGGING
-	if ((gfp_mask & __GFP_WAIT) && in_interrupt()) {
-		static int count = 0;
-		if (++count < 5) {
-			printk("gfp called nonatomically from interrupt %p\n",
-				__builtin_return_address(0));
+		if (curr != head) {
+			page = memlist_entry(curr, struct page, list);
+			memlist_del(curr);
+			area->count--;
+			map_nr = page - mem_map;	
+			MARK_USED(map_nr, curr_order, area);
+			nr_free_pages -= 1 << order;
+			map_nr = EXPAND(page, map_nr, order, curr_order, area);
+			page = mem_map + map_nr;
+			return page;	
+		}
+		curr_order++;
+		area++;
+	} while (curr_order < NR_MEM_LISTS);
+
+	return NULL;
+}
+
+static inline int balance_lowmemory (int gfp_mask)
+{
+	int freed;
+	static int low_on_memory = 0;
+
+#ifndef CONFIG_HIGHMEM
+	if (nr_free_pages > freepages.min) {
+		if (!low_on_memory)
+			return 1;
+		if (nr_free_pages >= freepages.high) {
+			low_on_memory = 0;
+			return 1;
 		}
-		goto nopage;
 	}
-#endif
 
-	/*
-	 * If this is a recursive call, we'd better
-	 * do our best to just allocate things without
-	 * further thought.
-	 */
-	if (!(current->flags & PF_MEMALLOC)) {
-		int freed;
-		static int low_on_memory = 0;
+	low_on_memory = 1;
+#else
+	static int low_on_highmemory = 0;
 
-#ifndef CONFIG_BIGMEM
+	if (gfp_mask & __GFP_HIGHMEM)
+	{
 		if (nr_free_pages > freepages.min) {
-			if (!low_on_memory)
-				goto ok_to_allocate;
+			if (!low_on_highmemory) {
+				return 1;
+			}
 			if (nr_free_pages >= freepages.high) {
-				low_on_memory = 0;
-				goto ok_to_allocate;
+				low_on_highmemory = 0;
+				return 1;
 			}
 		}
-
-		low_on_memory = 1;
-#else
-		static int low_on_bigmemory = 0;
-
-		if (gfp_mask & __GFP_BIGMEM)
-		{
-			if (nr_free_pages > freepages.min) {
-				if (!low_on_bigmemory)
-					goto ok_to_allocate;
-				if (nr_free_pages >= freepages.high) {
-					low_on_bigmemory = 0;
-					goto ok_to_allocate;
-				}
+		low_on_highmemory = 1;
+	} else {
+		if (nr_free_pages+nr_free_highpages > freepages.min) {
+			if (!low_on_memory) {
+				return 1;
 			}
-			low_on_bigmemory = 1;
-		} else {
-			if (nr_free_pages-nr_free_bigpages > freepages.min) {
-				if (!low_on_memory)
-					goto ok_to_allocate;
-				if (nr_free_pages-nr_free_bigpages >= freepages.high) {
-					low_on_memory = 0;
-					goto ok_to_allocate;
-				}
+			if (nr_free_pages+nr_free_highpages >= freepages.high) {
+				low_on_memory = 0;
+				return 1;
 			}
-			low_on_memory = 1;
 		}
+		low_on_memory = 1;
+	}
 #endif
-		current->flags |= PF_MEMALLOC;
-		freed = try_to_free_pages(gfp_mask);
-		current->flags &= ~PF_MEMALLOC;
+	current->flags |= PF_MEMALLOC;
+	freed = try_to_free_pages(gfp_mask);
+	current->flags &= ~PF_MEMALLOC;
+
+	if (!freed && !(gfp_mask & (__GFP_MED | __GFP_HIGH)))
+		return 0;
+	return 1;
+}
+
+struct page * __get_pages(int gfp_mask, unsigned long order)
+{
+	unsigned long flags;
+	struct page *page;
+	unsigned type;
+
+	if (order >= NR_MEM_LISTS)
+		goto nopage;
+
+	/*
+	 * If anyone calls gfp from interrupts nonatomically then it
+	 * will sooner or later tripped up by a schedule().
+	 */
+
+	/*
+	 * If this is a recursive call, we'd better
+	 * do our best to just allocate things without
+	 * further thought.
+	 */
+	if (!(current->flags & PF_MEMALLOC))
+		goto lowmemory;
 
-		if (!freed && !(gfp_mask & (__GFP_MED | __GFP_HIGH)))
-			goto nopage;
-	}
 ok_to_allocate:
-	spin_lock_irqsave(&page_alloc_lock, flags);
-#ifdef CONFIG_BIGMEM
-	RMQUEUEBIG(order, gfp_mask);
+#ifdef CONFIG_HIGHMEM
+	if (gfp_mask & __GFP_HIGHMEM)
+		type = MEM_TYPE_HIGH;
+	else
 #endif
-	RMQUEUE(order, gfp_mask);
+	if (gfp_mask & __GFP_DMA)
+		type = MEM_TYPE_DMA;
+	else
+		type = MEM_TYPE_NORMAL;
+
+	spin_lock_irqsave(&page_alloc_lock, flags);
+	do {
+		page = rmqueue(order, type);
+		if (page) {
+			spin_unlock_irqrestore(&page_alloc_lock, flags);
+			return page;
+		}
+	} while (type-- > 0) ;
 	spin_unlock_irqrestore(&page_alloc_lock, flags);
 
 	/*
@@ -320,7 +337,26 @@
 	}
 
 nopage:
-	return 0;
+	return NULL;
+
+lowmemory:
+	if (balance_lowmemory(gfp_mask))
+		goto ok_to_allocate;
+	goto nopage;
+}
+
+unsigned long __get_free_pages(int gfp_mask, unsigned long order)
+{
+	struct page *page;
+	page = __get_pages(gfp_mask, order);
+	if (!page)
+		return 0;
+	return page_address(page);
+}
+
+struct page * get_free_highpage(int gfp_mask)
+{
+	return __get_pages(gfp_mask, 0);
 }
 
 /*
@@ -331,36 +367,32 @@
 void show_free_areas(void)
 {
  	unsigned long order, flags;
- 	unsigned long total = 0;
+	unsigned type;
 
-	printk("Free pages:      %6dkB (%6dkB BigMem)\n ( ",
+	spin_lock_irqsave(&page_alloc_lock, flags);
+	printk("Free pages:      %6dkB (%6ldkB HighMem)\n",
 		nr_free_pages<<(PAGE_SHIFT-10),
-		nr_free_bigpages<<(PAGE_SHIFT-10));
-	printk("Free: %d, lru_cache: %d (%d %d %d)\n",
+		nr_free_highpages<<(PAGE_SHIFT-10));
+	printk("( Free: %d, lru_cache: %d (%d %d %d) )\n",
 		nr_free_pages,
 		nr_lru_pages,
 		freepages.min,
 		freepages.low,
 		freepages.high);
-	spin_lock_irqsave(&page_alloc_lock, flags);
- 	for (order=0 ; order < NR_MEM_LISTS; order++) {
-		struct page * tmp;
-		unsigned long nr = 0;
-		for (tmp = free_area[order].next ; tmp != memory_head(free_area+order) ; tmp = tmp->next) {
-			nr ++;
-		}
-#ifdef CONFIG_BIGMEM
-		for (tmp = free_area[BIGMEM_LISTS_OFFSET+order].next;
-		     tmp != memory_head(free_area+BIGMEM_LISTS_OFFSET+order);
-		     tmp = tmp->next) {
-			nr ++;
+
+	for (type = 0; type < NR_MEM_TYPES; type++) {
+ 		unsigned long total = 0;
+		printk("  %s: ", mem_type_strs[type]);
+	 	for (order = 0; order < NR_MEM_LISTS; order++) {
+			unsigned long nr = free_area[type][order].count;
+
+			total += nr * ((PAGE_SIZE>>10) << order);
+			printk("%lu*%lukB ", nr, (unsigned long)((PAGE_SIZE>>10) << order));
 		}
-#endif
-		total += nr * ((PAGE_SIZE>>10) << order);
-		printk("%lu*%lukB ", nr, (unsigned long)((PAGE_SIZE>>10) << order));
+		printk("= %lukB)\n", total);
 	}
 	spin_unlock_irqrestore(&page_alloc_lock, flags);
-	printk("= %lukB)\n", total);
+
 #ifdef SWAP_CACHE_INFO
 	show_swap_cache_info();
 #endif	
@@ -374,11 +406,12 @@
  *   - mark all memory queues empty
  *   - clear the memory bitmaps
  */
-unsigned long __init free_area_init(unsigned long start_mem, unsigned long end_mem)
+volatile int data;
+void __init free_area_init(unsigned long end_mem_pages)
 {
 	mem_map_t * p;
-	unsigned long mask = PAGE_MASK;
-	unsigned long i;
+	unsigned long i, j;
+	unsigned long map_size;
 
 	/*
 	 * Select nr of pages we try to keep free for important stuff
@@ -387,7 +420,7 @@
 	 * This is fairly arbitrary, but based on some behaviour
 	 * analysis.
 	 */
-	i = (end_mem - PAGE_OFFSET) >> (PAGE_SHIFT+7);
+	i = end_mem_pages >> 7;
 	if (i < 10)
 		i = 10;
 	if (i > 256)
@@ -395,36 +428,43 @@
 	freepages.min = i;
 	freepages.low = i * 2;
 	freepages.high = i * 3;
-	mem_map = (mem_map_t *) LONG_ALIGN(start_mem);
-	p = mem_map + MAP_NR(end_mem);
-	start_mem = LONG_ALIGN((unsigned long) p);
-	memset(mem_map, 0, start_mem - (unsigned long) mem_map);
-	do {
-		--p;
+
+	/*
+	 * Most architectures just pick 'start_mem'. Some architectures
+	 * (with lots of mem and discontinous memory maps) have to search
+	 * for a good area.
+	 */
+	map_size = end_mem_pages*sizeof(struct page);
+	mem_map = (struct page *) alloc_bootmem(map_size);
+	memset(mem_map, 0, map_size);
+
+	/*
+	 * Initially all pages are reserved - free ones are freed
+	 * up by free_all_bootmem() once the early boot process is
+	 * done.
+	 */
+	for (p = mem_map; p < mem_map + end_mem_pages; p++) {
 		set_page_count(p, 0);
-		p->flags = (1 << PG_DMA) | (1 << PG_reserved);
+		p->flags = (1 << PG_DMA);
+		SetPageReserved(p);
 		init_waitqueue_head(&p->wait);
-	} while (p > mem_map);
-
-	for (i = 0 ; i < NR_MEM_LISTS ; i++) {
-		unsigned long bitmap_size;
-		init_mem_queue(free_area+i);
-#ifdef CONFIG_BIGMEM
-		init_mem_queue(free_area+BIGMEM_LISTS_OFFSET+i);
-#endif
-		mask += mask;
-		end_mem = (end_mem + ~mask) & mask;
-		bitmap_size = (end_mem - PAGE_OFFSET) >> (PAGE_SHIFT + i);
-		bitmap_size = (bitmap_size + 7) >> 3;
-		bitmap_size = LONG_ALIGN(bitmap_size);
-		free_area[i].map = (unsigned int *) start_mem;
-		memset((void *) start_mem, 0, bitmap_size);
-		start_mem += bitmap_size;
-#ifdef CONFIG_BIGMEM
-		free_area[BIGMEM_LISTS_OFFSET+i].map = (unsigned int *) start_mem;
-		memset((void *) start_mem, 0, bitmap_size);
-		start_mem += bitmap_size;
-#endif
+		memlist_init(&p->list);
+	}
+	
+	for (j = 0 ; j < NR_MEM_TYPES ; j++) {
+		unsigned long mask = -1;
+		for (i = 0 ; i < NR_MEM_LISTS ; i++) {
+			unsigned long bitmap_size;
+			unsigned int * map;
+			memlist_init(&free_area[j][i].free_list);
+			mask += mask;
+			end_mem_pages = (end_mem_pages + ~mask) & mask;
+			bitmap_size = end_mem_pages >> i;
+			bitmap_size = (bitmap_size + 7) >> 3;
+			bitmap_size = LONG_ALIGN(bitmap_size);
+			map = (unsigned int *) alloc_bootmem(bitmap_size);
+			free_area[j][i].map = map;
+			memset((void *) map, 0, bitmap_size);
+		}
 	}
-	return start_mem;
 }

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)