patch-2.4.23 linux-2.4.23/arch/x86_64/mm/fault.c

Next file: linux-2.4.23/arch/x86_64/mm/ioremap.c
Previous file: linux-2.4.23/arch/x86_64/mm/extable.c
Back to the patch index
Back to the overall index

diff -urN linux-2.4.22/arch/x86_64/mm/fault.c linux-2.4.23/arch/x86_64/mm/fault.c
@@ -94,6 +94,85 @@
 	printk("BAD\n");
 }
 
+/* Sometimes the CPU reports invalid exceptions on prefetch.
+   Check that here and ignore.
+   Opcode checker based on code by Richard Brunner */
+static int is_prefetch(struct pt_regs *regs, unsigned long addr)
+{ 
+	unsigned char *instr = (unsigned char *)(regs->rip);
+	int scan_more = 1;
+	int prefetch = 0; 
+	unsigned char *max_instr = instr + 15;
+
+	/* Avoid recursive faults for this common case */
+	if (regs->rip == addr)
+		return 0; 
+
+	if (regs->cs & (1<<2))
+		return 0;
+
+	while (scan_more && instr < max_instr) { 
+		unsigned char opcode;
+		unsigned char instr_hi;
+		unsigned char instr_lo;
+
+		if (__get_user(opcode, instr))
+			break; 
+
+		instr_hi = opcode & 0xf0; 
+		instr_lo = opcode & 0x0f; 
+		instr++;
+
+		switch (instr_hi) { 
+		case 0x20:
+		case 0x30:
+			/* Values 0x26,0x2E,0x36,0x3E are valid x86
+			   prefixes.  In long mode, the CPU will signal
+			   invalid opcode if some of these prefixes are
+			   present so we will never get here anyway */
+			scan_more = ((instr_lo & 7) == 0x6);
+			break;
+			
+		case 0x40:
+			/* In AMD64 long mode, 0x40 to 0x4F are valid REX prefixes
+			   Need to figure out under what instruction mode the
+			   instruction was issued ... */
+			/* Could check the LDT for lm, but for now it's good
+			   enough to assume that long mode only uses well known
+			   segments or kernel. */
+			scan_more = ((regs->cs & 3) == 0) || (regs->cs == __USER_CS);
+			break;
+			
+		case 0x60:
+			/* 0x64 thru 0x67 are valid prefixes in all modes. */
+			scan_more = (instr_lo & 0xC) == 0x4;
+			break;		
+		case 0xF0:
+			/* 0xF0, 0xF2, and 0xF3 are valid prefixes in all modes. */
+			scan_more = !instr_lo || (instr_lo>>1) == 1;
+			break;			
+		case 0x00:
+			/* Prefetch instruction is 0x0F0D or 0x0F18 */
+			scan_more = 0;
+			if (__get_user(opcode, instr)) 
+				break;
+			prefetch = (instr_lo == 0xF) &&
+				(opcode == 0x0D || opcode == 0x18);
+			break;			
+		default:
+			scan_more = 0;
+			break;
+		} 
+	}
+
+#if 0
+	if (prefetch)
+		printk("%s: prefetch caused page fault at %lx/%lx\n", current->comm,
+		       regs->rip, addr);
+#endif
+	return prefetch;
+}
+
 int page_fault_trace; 
 int exception_trace = 1;
 
@@ -156,7 +235,15 @@
 	 * context, we must not take the fault..
 	 */
 	if (in_interrupt() || !mm)
-		goto no_context;
+		goto bad_area_nosemaphore;
+
+	/* 
+	 * Work around K8 errata #100. See the K8 specification update for 
+	 * details. Any code segment in LDT is compatibility mode.
+	 */
+	if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) &&
+		(address >> 32))
+		return;
 
 again:
 	down_read(&mm->mmap_sem);
@@ -226,9 +313,11 @@
 	up_read(&mm->mmap_sem);
 
 bad_area_nosemaphore:
-
 	/* User mode accesses just cause a SIGSEGV */
 	if (error_code & 4) {
+		if (is_prefetch(regs, address))
+			return;
+
 		if (exception_trace && !(tsk->ptrace & PT_PTRACED) && 
 		    (tsk->sig->action[SIGSEGV-1].sa.sa_handler == SIG_IGN ||
 		    (tsk->sig->action[SIGSEGV-1].sa.sa_handler == SIG_DFL)))
@@ -260,40 +349,27 @@
 		return;
 	}
 
+	if (is_prefetch(regs, address))
+		return;
+
 /*
  * Oops. The kernel tried to access some bad page. We'll have to
  * terminate things with extreme prejudice.
  */
 
-	console_verbose();
-	bust_spinlocks(1); 
-
-	int cpu = safe_smp_processor_id(); 
 	unsigned long flags; 
-	extern int die_owner; 
-	extern spinlock_t die_lock;
-	
-	__save_flags(flags); 
-	__cli(); 
-	if (!spin_trylock(&die_lock)) { 
-		if (cpu == die_owner) 
-			/* nested oops. should stop eventually */;
-		else
-			spin_lock(&die_lock); 
-	}
-	die_owner = cpu; 
-
+	prepare_die(&flags);
 	if (address < PAGE_SIZE)
 		printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
 	else
 		printk(KERN_ALERT "Unable to handle kernel paging request");
-	printk(" at virtual address %016lx\n",address);
-	printk(" printing rip:\n");
-	printk("%016lx\n", regs->rip);
+	printk(KERN_ALERT " at %016lx RIP: ", address); 
+	printk_address(regs->rip);
 	dump_pagetable(address);
-	die("Oops", regs, error_code);
-	bust_spinlocks(0); 
-	spin_unlock_irqrestore(&die_lock, flags);
+	__die("Oops", regs, error_code);
+	/* Executive summary in case the oops scrolled away */
+	printk(KERN_EMERG "CR2: %016lx\n", address);
+	exit_die(flags);
 	do_exit(SIGKILL);
 
 /*
@@ -315,10 +391,13 @@
 do_sigbus:
 	up_read(&mm->mmap_sem);
 
-	/*
-	 * Send a sigbus, regardless of whether we were in kernel
-	 * or user mode.
-	 */
+	/* Kernel mode? Handle exceptions or die */
+	if (!(error_code & 4))
+		goto no_context;
+		
+	if (is_prefetch(regs, address))
+		return;
+
 	tsk->thread.cr2 = address;
 	tsk->thread.error_code = error_code;
 	tsk->thread.trap_no = 14;
@@ -327,10 +406,6 @@
 	info.si_code = BUS_ADRERR;
 	info.si_addr = (void *)address;
 	force_sig_info(SIGBUS, &info, tsk);
-
-	/* Kernel mode? Handle exceptions or die */
-	if (!(error_code & 4))
-		goto no_context;
 	return;
 
 

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)