51 files changed, 14858 insertions, 473 deletions
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 11b90434d..f96f04b15 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -5,7 +5,9 @@ set(SRCS    core.cpp
             mem_map_funcs.cpp
             system.cpp
             arm/disassembler/arm_disasm.cpp
+            arm/disassembler/load_symbol_map.cpp
             arm/interpreter/arm_interpreter.cpp
+            arm/interpreter/armcopro.cpp
             arm/interpreter/armemu.cpp
             arm/interpreter/arminit.cpp
             arm/interpreter/armmmu.cpp
@@ -13,12 +15,24 @@ set(SRCS    core.cpp
             arm/interpreter/armsupp.cpp
             arm/interpreter/armvirt.cpp
             arm/interpreter/thumbemu.cpp
-            arm/mmu/arm1176jzf_s_mmu.cpp
+            arm/interpreter/vfp/vfp.cpp
+            arm/interpreter/vfp/vfpdouble.cpp
+            arm/interpreter/vfp/vfpinstr.cpp
+            arm/interpreter/vfp/vfpsingle.cpp
+            arm/interpreter/mmu/arm1176jzf_s_mmu.cpp
+            arm/interpreter/mmu/cache.cpp
+            arm/interpreter/mmu/maverick.cpp
+            arm/interpreter/mmu/rb.cpp
+            arm/interpreter/mmu/sa_mmu.cpp
+            arm/interpreter/mmu/tlb.cpp
+            arm/interpreter/mmu/wb.cpp
+            arm/interpreter/mmu/xscale_copro.cpp
             elf/elf_reader.cpp
             file_sys/directory_file_system.cpp
             file_sys/meta_file_system.cpp
             hle/hle.cpp
-            hle/mrc.cpp
+            hle/config_mem.cpp
+            hle/coprocessor.cpp
             hle/syscall.cpp
             hle/service/apt.cpp
             hle/service/gsp.cpp
diff --git a/src/core/arm/disassembler/load_symbol_map.cpp b/src/core/arm/disassembler/load_symbol_map.cpp
new file mode 100644
index 000000000..d7fc0a042
--- /dev/null
+++ b/src/core/arm/disassembler/load_symbol_map.cpp
@@ -0,0 +1,33 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2
+// Refer to the license.txt file included.
+
+#include <string>
+#include <vector>
+
+#include "common/symbols.h"
+#include "common/common_types.h"
+#include "common/file_util.h"
+
+#include "core/arm/disassembler/load_symbol_map.h"
+
+/*
+ * Loads a symbol map file for use with the disassembler
+ * @param filename String filename path of symbol map file
+ */
+void LoadSymbolMap(std::string filename) {
+    std::ifstream infile(filename);
+
+    std::string address_str, function_name, line;
+    u32 size, address;
+
+    while (std::getline(infile, line)) {
+        std::istringstream iss(line);
+        if (!(iss >> address_str >> size >> function_name)) { 
+            break; // Error parsing 
+        }
+        u32 address = std::stoul(address_str, nullptr, 16);
+
+        Symbols::Add(address, function_name, size, 2);
+    }
+}
diff --git a/src/core/arm/disassembler/load_symbol_map.h b/src/core/arm/disassembler/load_symbol_map.h
new file mode 100644
index 000000000..837cca99b
--- /dev/null
+++ b/src/core/arm/disassembler/load_symbol_map.h
@@ -0,0 +1,13 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <string>
+
+/*
+ * Loads a symbol map file for use with the disassembler
+ * @param filename String filename path of symbol map file
+ */
+void LoadSymbolMap(std::string filename);
diff --git a/src/core/arm/interpreter/arm_interpreter.cpp b/src/core/arm/interpreter/arm_interpreter.cpp
index 4045779d7..4652803de 100644
--- a/src/core/arm/interpreter/arm_interpreter.cpp
+++ b/src/core/arm/interpreter/arm_interpreter.cpp
@@ -2,7 +2,7 @@
 // Licensed under GPLv2
 // Refer to the license.txt file included.  
 
-#include "arm_interpreter.h"
+#include "core/arm/interpreter/arm_interpreter.h"
 
 const static cpu_config_t s_arm11_cpu_info = {
     "armv6", "arm11", 0x0007b000, 0x0007f000, NONCACHE
diff --git a/src/core/arm/interpreter/armcopro.cpp b/src/core/arm/interpreter/armcopro.cpp
new file mode 100644
index 000000000..6a75e6601
--- /dev/null
+++ b/src/core/arm/interpreter/armcopro.cpp
@@ -0,0 +1,842 @@
+/*  armcopro.c -- co-processor interface:  ARM6 Instruction Emulator.
+    Copyright (C) 1994, 2000 Advanced RISC Machines Ltd.
+ 
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+ 
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+ 
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
+
+
+#include "core/arm/interpreter/armdefs.h"
+#include "core/arm/interpreter/armos.h"
+#include "core/arm/interpreter/armemu.h"
+#include "core/arm/interpreter/vfp/vfp.h"
+
+//chy 2005-07-08
+//#include "ansidecl.h"
+//chy -------
+//#include "iwmmxt.h"
+
+
+//chy 2005-09-19 add CP6 MRC support (for get irq number and base)
+extern unsigned xscale_cp6_mrc (ARMul_State * state, unsigned type,
+				ARMword instr, ARMword * data);
+//chy 2005-09-19---------------
+
+extern unsigned xscale_cp13_init (ARMul_State * state);
+extern unsigned xscale_cp13_exit (ARMul_State * state);
+extern unsigned xscale_cp13_ldc (ARMul_State * state, unsigned type,
+				 ARMword instr, ARMword data);
+extern unsigned xscale_cp13_stc (ARMul_State * state, unsigned type,
+				 ARMword instr, ARMword * data);
+extern unsigned xscale_cp13_mrc (ARMul_State * state, unsigned type,
+				 ARMword instr, ARMword * data);
+extern unsigned xscale_cp13_mcr (ARMul_State * state, unsigned type,
+				 ARMword instr, ARMword data);
+extern unsigned xscale_cp13_cdp (ARMul_State * state, unsigned type,
+				 ARMword instr);
+extern unsigned xscale_cp13_read_reg (ARMul_State * state, unsigned reg,
+				      ARMword * data);
+extern unsigned xscale_cp13_write_reg (ARMul_State * state, unsigned reg,
+				       ARMword data);
+extern unsigned xscale_cp14_init (ARMul_State * state);
+extern unsigned xscale_cp14_exit (ARMul_State * state);
+extern unsigned xscale_cp14_ldc (ARMul_State * state, unsigned type,
+				 ARMword instr, ARMword data);
+extern unsigned xscale_cp14_stc (ARMul_State * state, unsigned type,
+				 ARMword instr, ARMword * data);
+extern unsigned xscale_cp14_mrc (ARMul_State * state, unsigned type,
+				 ARMword instr, ARMword * data);
+extern unsigned xscale_cp14_mcr (ARMul_State * state, unsigned type,
+				 ARMword instr, ARMword data);
+extern unsigned xscale_cp14_cdp (ARMul_State * state, unsigned type,
+				 ARMword instr);
+extern unsigned xscale_cp14_read_reg (ARMul_State * state, unsigned reg,
+				      ARMword * data);
+extern unsigned xscale_cp14_write_reg (ARMul_State * state, unsigned reg,
+				       ARMword data);
+extern unsigned xscale_cp15_init (ARMul_State * state);
+extern unsigned xscale_cp15_exit (ARMul_State * state);
+extern unsigned xscale_cp15_ldc (ARMul_State * state, unsigned type,
+				 ARMword instr, ARMword data);
+extern unsigned xscale_cp15_stc (ARMul_State * state, unsigned type,
+				 ARMword instr, ARMword * data);
+extern unsigned xscale_cp15_mrc (ARMul_State * state, unsigned type,
+				 ARMword instr, ARMword * data);
+extern unsigned xscale_cp15_mcr (ARMul_State * state, unsigned type,
+				 ARMword instr, ARMword data);
+extern unsigned xscale_cp15_cdp (ARMul_State * state, unsigned type,
+				 ARMword instr);
+extern unsigned xscale_cp15_read_reg (ARMul_State * state, unsigned reg,
+				      ARMword * data);
+extern unsigned xscale_cp15_write_reg (ARMul_State * state, unsigned reg,
+				       ARMword data);
+extern unsigned xscale_cp15_cp_access_allowed (ARMul_State * state, unsigned reg,
+					unsigned cpnum);
+
+/* Dummy Co-processors.  */
+
+static unsigned
+NoCoPro3R (ARMul_State * state,
+	   unsigned a, ARMword b)
+{
+	return ARMul_CANT;
+}
+
+static unsigned
+NoCoPro4R (ARMul_State * state,
+	   unsigned a,
+	   ARMword b, ARMword c)
+{
+	return ARMul_CANT;
+}
+
+static unsigned
+NoCoPro4W (ARMul_State * state,
+	   unsigned a,
+	   ARMword b, ARMword * c)
+{
+	return ARMul_CANT;
+}
+
+static unsigned
+NoCoPro5R (ARMul_State * state,
+	   unsigned a,
+	   ARMword b, 
+	   ARMword c, ARMword d)
+{
+	return ARMul_CANT;
+}
+
+static unsigned
+NoCoPro5W (ARMul_State * state,
+	   unsigned a,
+	   ARMword b,
+	   ARMword * c, ARMword * d )
+{
+	return ARMul_CANT;
+}
+
+/* The XScale Co-processors.  */
+
+/* Coprocessor 15:  System Control.  */
+static void write_cp14_reg (unsigned, ARMword);
+static ARMword read_cp14_reg (unsigned);
+
+/* There are two sets of registers for copro 15.
+   One set is available when opcode_2 is 0 and
+   the other set when opcode_2 >= 1.  */
+static ARMword XScale_cp15_opcode_2_is_0_Regs[16];
+static ARMword XScale_cp15_opcode_2_is_not_0_Regs[16];
+/* There are also a set of breakpoint registers
+   which are accessed via CRm instead of opcode_2.  */
+static ARMword XScale_cp15_DBR1;
+static ARMword XScale_cp15_DBCON;
+static ARMword XScale_cp15_IBCR0;
+static ARMword XScale_cp15_IBCR1;
+
+static unsigned
+XScale_cp15_init (ARMul_State * state)
+{
+	int i;
+
+	for (i = 16; i--;) {
+		XScale_cp15_opcode_2_is_0_Regs[i] = 0;
+		XScale_cp15_opcode_2_is_not_0_Regs[i] = 0;
+	}
+
+	/* Initialise the processor ID.  */
+	//chy 2003-03-24, is same as cpu id in skyeye_options.c
+	//XScale_cp15_opcode_2_is_0_Regs[0] = 0x69052000;
+	XScale_cp15_opcode_2_is_0_Regs[0] = 0x69050000;
+
+	/* Initialise the cache type.  */
+	XScale_cp15_opcode_2_is_not_0_Regs[0] = 0x0B1AA1AA;
+
+	/* Initialise the ARM Control Register.  */
+	XScale_cp15_opcode_2_is_0_Regs[1] = 0x00000078;
+
+	return No_exp;
+}
+
+/* Check an access to a register.  */
+
+static unsigned
+check_cp15_access (ARMul_State * state,
+		   unsigned reg,
+		   unsigned CRm, unsigned opcode_1, unsigned opcode_2)
+{
+	/* Do not allow access to these register in USER mode.  */
+	//chy 2006-02-16 , should not consider system mode, don't conside 26bit mode
+	if (state->Mode == USER26MODE || state->Mode == USER32MODE )
+		return ARMul_CANT;
+
+	/* Opcode_1should be zero.  */
+	if (opcode_1 != 0)
+		return ARMul_CANT;
+
+	/* Different register have different access requirements.  */
+	switch (reg) {
+	case 0:
+	case 1:
+		/* CRm must be 0.  Opcode_2 can be anything.  */
+		if (CRm != 0)
+			return ARMul_CANT;
+		break;
+	case 2:
+	case 3:
+		/* CRm must be 0.  Opcode_2 must be zero.  */
+		if ((CRm != 0) || (opcode_2 != 0))
+			return ARMul_CANT;
+		break;
+	case 4:
+		/* Access not allowed.  */
+		return ARMul_CANT;
+	case 5:
+	case 6:
+		/* Opcode_2 must be zero.  CRm must be 0.  */
+		if ((CRm != 0) || (opcode_2 != 0))
+			return ARMul_CANT;
+		break;
+	case 7:
+		/* Permissable combinations:
+		   Opcode_2  CRm
+		   0       5
+		   0       6
+		   0       7
+		   1       5
+		   1       6
+		   1      10
+		   4      10
+		   5       2
+		   6       5  */
+		switch (opcode_2) {
+		default:
+			return ARMul_CANT;
+		case 6:
+			if (CRm != 5)
+				return ARMul_CANT;
+			break;
+		case 5:
+			if (CRm != 2)
+				return ARMul_CANT;
+			break;
+		case 4:
+			if (CRm != 10)
+				return ARMul_CANT;
+			break;
+		case 1:
+			if ((CRm != 5) && (CRm != 6) && (CRm != 10))
+				return ARMul_CANT;
+			break;
+		case 0:
+			if ((CRm < 5) || (CRm > 7))
+				return ARMul_CANT;
+			break;
+		}
+		break;
+
+	case 8:
+		/* Permissable combinations:
+		   Opcode_2  CRm
+		   0       5
+		   0       6
+		   0       7
+		   1       5
+		   1       6  */
+		if (opcode_2 > 1)
+			return ARMul_CANT;
+		if ((CRm < 5) || (CRm > 7))
+			return ARMul_CANT;
+		if (opcode_2 == 1 && CRm == 7)
+			return ARMul_CANT;
+		break;
+	case 9:
+		/* Opcode_2 must be zero or one.  CRm must be 1 or 2.  */
+		if (((CRm != 0) && (CRm != 1))
+		    || ((opcode_2 != 1) && (opcode_2 != 2)))
+			return ARMul_CANT;
+		break;
+	case 10:
+		/* Opcode_2 must be zero or one.  CRm must be 4 or 8.  */
+		if (((CRm != 0) && (CRm != 1))
+		    || ((opcode_2 != 4) && (opcode_2 != 8)))
+			return ARMul_CANT;
+		break;
+	case 11:
+		/* Access not allowed.  */
+		return ARMul_CANT;
+	case 12:
+		/* Access not allowed.  */
+		return ARMul_CANT;
+	case 13:
+		/* Opcode_2 must be zero.  CRm must be 0.  */
+		if ((CRm != 0) || (opcode_2 != 0))
+			return ARMul_CANT;
+		break;
+	case 14:
+		/* Opcode_2 must be 0.  CRm must be 0, 3, 4, 8 or 9.  */
+		if (opcode_2 != 0)
+			return ARMul_CANT;
+
+		if ((CRm != 0) && (CRm != 3) && (CRm != 4) && (CRm != 8)
+		    && (CRm != 9))
+			return ARMul_CANT;
+		break;
+	case 15:
+		/* Opcode_2 must be zero.  CRm must be 1.  */
+		if ((CRm != 1) || (opcode_2 != 0))
+			return ARMul_CANT;
+		break;
+	default:
+		/* Should never happen.  */
+		return ARMul_CANT;
+	}
+
+	return ARMul_DONE;
+}
+
+/* Coprocessor 13:  Interrupt Controller and Bus Controller.  */
+
+/* There are two sets of registers for copro 13.
+   One set (of three registers) is available when CRm is 0
+   and the other set (of six registers) when CRm is 1.  */
+
+static ARMword XScale_cp13_CR0_Regs[16];
+static ARMword XScale_cp13_CR1_Regs[16];
+
+static unsigned
+XScale_cp13_init (ARMul_State * state)
+{
+	int i;
+
+	for (i = 16; i--;) {
+		XScale_cp13_CR0_Regs[i] = 0;
+		XScale_cp13_CR1_Regs[i] = 0;
+	}
+
+	return No_exp;
+}
+
+/* Check an access to a register.  */
+
+static unsigned
+check_cp13_access (ARMul_State * state,
+		   unsigned reg,
+		   unsigned CRm, unsigned opcode_1, unsigned opcode_2)
+{
+	/* Do not allow access to these registers in USER mode.  */
+	//chy 2006-02-16 , should not consider system mode, don't conside 26bit mode
+	if (state->Mode == USER26MODE || state->Mode == USER32MODE )
+		return ARMul_CANT;
+
+	/* The opcodes should be zero.  */
+	if ((opcode_1 != 0) || (opcode_2 != 0))
+		return ARMul_CANT;
+
+	/* Do not allow access to these register if bit
+	   13 of coprocessor 15's register 15 is zero.  */
+	if (!CP_ACCESS_ALLOWED (state, 13))
+		return ARMul_CANT;
+
+	/* Registers 0, 4 and 8 are defined when CRm == 0.
+	   Registers 0, 1, 4, 5, 6, 7, 8 are defined when CRm == 1.
+	   For all other CRm values undefined behaviour results.  */
+	if (CRm == 0) {
+		if (reg == 0 || reg == 4 || reg == 8)
+			return ARMul_DONE;
+	}
+	else if (CRm == 1) {
+		if (reg == 0 || reg == 1 || (reg >= 4 && reg <= 8))
+			return ARMul_DONE;
+	}
+
+	return ARMul_CANT;
+}
+
+/* Coprocessor 14:  Performance Monitoring,  Clock and Power management,
+   Software Debug.  */
+
+static ARMword XScale_cp14_Regs[16];
+
+static unsigned
+XScale_cp14_init (ARMul_State * state)
+{
+	int i;
+
+	for (i = 16; i--;)
+		XScale_cp14_Regs[i] = 0;
+
+	return No_exp;
+}
+
+/* Check an access to a register.  */
+
+static unsigned
+check_cp14_access (ARMul_State * state,
+		   unsigned reg,
+		   unsigned CRm, unsigned opcode1, unsigned opcode2)
+{
+	/* Not allowed to access these register in USER mode.  */
+	//chy 2006-02-16 , should not consider system mode, don't conside 26bit mode
+	if (state->Mode == USER26MODE || state->Mode == USER32MODE )
+		return ARMul_CANT;
+
+	/* CRm should be zero.  */
+	if (CRm != 0)
+		return ARMul_CANT;
+
+	/* OPcodes should be zero.  */
+	if (opcode1 != 0 || opcode2 != 0)
+		return ARMul_CANT;
+
+	/* Accessing registers 4 or 5 has unpredicatable results.  */
+	if (reg >= 4 && reg <= 5)
+		return ARMul_CANT;
+
+	return ARMul_DONE;
+}
+
+/* Here's ARMulator's MMU definition.  A few things to note:
+   1) It has eight registers, but only two are defined.
+   2) You can only access its registers with MCR and MRC.
+   3) MMU Register 0 (ID) returns 0x41440110
+   4) Register 1 only has 4 bits defined.  Bits 0 to 3 are unused, bit 4
+      controls 32/26 bit program space, bit 5 controls 32/26 bit data space,
+      bit 6 controls late abort timimg and bit 7 controls big/little endian.  */
+
+static ARMword MMUReg[8];
+
+static unsigned
+MMUInit (ARMul_State * state)
+{
+/* 2004-05-09 chy
+-------------------------------------------------------------
+read ARM Architecture Reference Manual
+2.6.5 Data Abort
+There are three Abort Model in ARM arch.
+
+Early Abort Model: used in some ARMv3 and earlier implementations. In this
+model, base register wirteback occurred for LDC,LDM,STC,STM instructions, and
+the base register was unchanged for all other instructions. (oldest)
+
+Base Restored Abort Model: If a Data Abort occurs in an instruction which
+specifies base register writeback, the value in the base register is
+unchanged. (strongarm, xscale)
+
+Base Updated Abort Model: If a Data Abort occurs in an instruction which
+specifies base register writeback, the base register writeback still occurs.
+(arm720T)
+
+read PART B
+chap2 The System Control Coprocessor  CP15
+2.4 Register1:control register
+L(bit 6): in some ARMv3 and earlier implementations, the abort model of the
+processor could be configured:
+0=early Abort Model Selected(now obsolete)
+1=Late Abort Model selceted(same as Base Updated Abort Model)
+
+on later processors, this bit reads as 1 and ignores writes.
+-------------------------------------------------------------
+So, if lateabtSig=1, then it means Late Abort Model(Base Updated Abort Model)
+    if lateabtSig=0, then it means Base Restored Abort Model
+because the ARMs which skyeye simulates are all belonged to  ARMv4,
+so I think MMUReg[1]'s bit 6 should always be 1
+
+*/
+
+	MMUReg[1] = state->prog32Sig << 4 |
+		state->data32Sig << 5 | 1 << 6 | state->bigendSig << 7;
+	//state->data32Sig << 5 | state->lateabtSig << 6 | state->bigendSig << 7;
+
+
+	NOTICE_LOG(ARM11, "ARMul_ConsolePrint: MMU present");
+
+	return TRUE;
+}
+
+static unsigned
+MMUMRC (ARMul_State * state, unsigned type,
+	ARMword instr, ARMword * value)
+{
+	mmu_mrc (state, instr, value);
+	return (ARMul_DONE);
+}
+
+static unsigned
+MMUMCR (ARMul_State * state, unsigned type, ARMword instr, ARMword value)
+{
+	mmu_mcr (state, instr, value);
+	return (ARMul_DONE);
+}
+
+/* What follows is the Validation Suite Coprocessor.  It uses two
+   co-processor numbers (4 and 5) and has the follwing functionality.
+   Sixteen registers.  Both co-processor nuimbers can be used in an MCR
+   and MRC to access these registers.  CP 4 can LDC and STC to and from
+   the registers.  CP 4 and CP 5 CDP 0 will busy wait for the number of
+   cycles specified by a CP register.  CP 5 CDP 1 issues a FIQ after a
+   number of cycles (specified in a CP register), CDP 2 issues an IRQW
+   in the same way, CDP 3 and 4 turn of the FIQ and IRQ source, and CDP 5
+   stores a 32 bit time value in a CP register (actually it's the total
+   number of N, S, I, C and F cyles).  */
+
+static ARMword ValReg[16];
+
+static unsigned
+ValLDC (ARMul_State * state,
+	unsigned type, ARMword instr, ARMword data)
+{
+	static unsigned words;
+
+	if (type != ARMul_DATA)
+		words = 0;
+	else {
+		ValReg[BITS (12, 15)] = data;
+
+		if (BIT (22))
+			/* It's a long access, get two words.  */
+			if (words++ != 4)
+				return ARMul_INC;
+	}
+
+	return ARMul_DONE;
+}
+
+static unsigned
+ValSTC (ARMul_State * state,
+	unsigned type, ARMword instr, ARMword * data)
+{
+	static unsigned words;
+
+	if (type != ARMul_DATA)
+		words = 0;
+	else {
+		*data = ValReg[BITS (12, 15)];
+
+		if (BIT (22))
+			/* It's a long access, get two words.  */
+			if (words++ != 4)
+				return ARMul_INC;
+	}
+
+	return ARMul_DONE;
+}
+
+static unsigned
+ValMRC (ARMul_State * state,
+	unsigned type, ARMword instr, ARMword * value)
+{
+	*value = ValReg[BITS (16, 19)];
+
+	return ARMul_DONE;
+}
+
+static unsigned
+ValMCR (ARMul_State * state,
+	unsigned type, ARMword instr, ARMword value)
+{
+	ValReg[BITS (16, 19)] = value;
+
+	return ARMul_DONE;
+}
+
+static unsigned
+ValCDP (ARMul_State * state, unsigned type, ARMword instr)
+{
+	static unsigned int finish = 0;
+
+	if (BITS (20, 23) != 0)
+		return ARMul_CANT;
+
+	if (type == ARMul_FIRST) {
+		ARMword howlong;
+
+		howlong = ValReg[BITS (0, 3)];
+
+		/* First cycle of a busy wait.  */
+		finish = ARMul_Time (state) + howlong;
+
+		return howlong == 0 ? ARMul_DONE : ARMul_BUSY;
+	}
+	else if (type == ARMul_BUSY) {
+		if (ARMul_Time (state) >= finish)
+			return ARMul_DONE;
+		else
+			return ARMul_BUSY;
+	}
+
+	return ARMul_CANT;
+}
+
+static unsigned
+DoAFIQ (ARMul_State * state)
+{
+	state->NfiqSig = LOW;
+	return 0;
+}
+
+static unsigned
+DoAIRQ (ARMul_State * state)
+{
+	state->NirqSig = LOW;
+	return 0;
+}
+
+static unsigned
+IntCDP (ARMul_State * state, unsigned type, ARMword instr)
+{
+	static unsigned int finish;
+	ARMword howlong;
+
+	howlong = ValReg[BITS (0, 3)];
+
+	switch ((int) BITS (20, 23)) {
+	case 0:
+		if (type == ARMul_FIRST) {
+			/* First cycle of a busy wait.  */
+			finish = ARMul_Time (state) + howlong;
+
+			return howlong == 0 ? ARMul_DONE : ARMul_BUSY;
+		}
+		else if (type == ARMul_BUSY) {
+			if (ARMul_Time (state) >= finish)
+				return ARMul_DONE;
+			else
+				return ARMul_BUSY;
+		}
+		return ARMul_DONE;
+
+	case 1:
+		if (howlong == 0)
+			ARMul_Abort (state, ARMul_FIQV);
+		else
+			ARMul_ScheduleEvent (state, howlong, DoAFIQ);
+		return ARMul_DONE;
+
+	case 2:
+		if (howlong == 0)
+			ARMul_Abort (state, ARMul_IRQV);
+		else
+			ARMul_ScheduleEvent (state, howlong, DoAIRQ);
+		return ARMul_DONE;
+
+	case 3:
+		state->NfiqSig = HIGH;
+		return ARMul_DONE;
+
+	case 4:
+		state->NirqSig = HIGH;
+		return ARMul_DONE;
+
+	case 5:
+		ValReg[BITS (0, 3)] = ARMul_Time (state);
+		return ARMul_DONE;
+	}
+
+	return ARMul_CANT;
+}
+
+/* Install co-processor instruction handlers in this routine.  */
+
+unsigned
+ARMul_CoProInit (ARMul_State * state)
+{
+	unsigned int i;
+
+	/* Initialise tham all first.  */
+	for (i = 0; i < 16; i++)
+		ARMul_CoProDetach (state, i);
+
+	/* Install CoPro Instruction handlers here.
+	   The format is:
+	   ARMul_CoProAttach (state, CP Number, Init routine, Exit routine
+	   LDC routine, STC routine, MRC routine, MCR routine,
+	   CDP routine, Read Reg routine, Write Reg routine).  */
+	if (state->is_ep9312) {
+		ARMul_CoProAttach (state, 4, NULL, NULL, DSPLDC4, DSPSTC4,
+				   DSPMRC4, DSPMCR4, NULL, NULL, DSPCDP4, NULL, NULL);
+		ARMul_CoProAttach (state, 5, NULL, NULL, DSPLDC5, DSPSTC5,
+				   DSPMRC5, DSPMCR5, NULL, NULL, DSPCDP5, NULL, NULL);
+		ARMul_CoProAttach (state, 6, NULL, NULL, NULL, NULL,
+				   DSPMRC6, DSPMCR6, NULL, NULL, DSPCDP6, NULL, NULL);
+	}
+	else {
+		ARMul_CoProAttach (state, 4, NULL, NULL, ValLDC, ValSTC,
+				   ValMRC, ValMCR, NULL, NULL, ValCDP, NULL, NULL);
+
+		ARMul_CoProAttach (state, 5, NULL, NULL, NULL, NULL,
+				   ValMRC, ValMCR, NULL, NULL, IntCDP, NULL, NULL);
+	}
+
+	if (state->is_XScale) {
+		//chy 2005-09-19, for PXA27x's CP6
+		if (state->is_pxa27x) {
+			ARMul_CoProAttach (state, 6, NULL, NULL,
+					   NULL, NULL, xscale_cp6_mrc,
+					   NULL, NULL, NULL, NULL, NULL, NULL);
+		}
+		//chy 2005-09-19 end------------- 
+		ARMul_CoProAttach (state, 13, xscale_cp13_init,
+				   xscale_cp13_exit, xscale_cp13_ldc,
+				   xscale_cp13_stc, xscale_cp13_mrc,
+				   xscale_cp13_mcr, NULL, NULL, xscale_cp13_cdp,
+				   xscale_cp13_read_reg,
+				   xscale_cp13_write_reg);
+
+		ARMul_CoProAttach (state, 14, xscale_cp14_init,
+				   xscale_cp14_exit, xscale_cp14_ldc,
+				   xscale_cp14_stc, xscale_cp14_mrc,
+				   xscale_cp14_mcr, NULL, NULL, xscale_cp14_cdp,
+				   xscale_cp14_read_reg,
+				   xscale_cp14_write_reg);
+		//chy: 2003-08-24.
+		ARMul_CoProAttach (state, 15, xscale_cp15_init,
+				   xscale_cp15_exit, xscale_cp15_ldc,
+				   xscale_cp15_stc, xscale_cp15_mrc,
+				   xscale_cp15_mcr, NULL, NULL, xscale_cp15_cdp,
+				   xscale_cp15_read_reg,
+				   xscale_cp15_write_reg);
+	}
+	else if (state->is_v6) {
+		ARMul_CoProAttach (state, 10, VFPInit, NULL, VFPLDC, VFPSTC,
+				   VFPMRC, VFPMCR, VFPMRRC, VFPMCRR, VFPCDP, NULL, NULL);
+		ARMul_CoProAttach (state, 11, VFPInit, NULL, VFPLDC, VFPSTC,
+				   VFPMRC, VFPMCR, VFPMRRC, VFPMCRR, VFPCDP, NULL, NULL);
+		
+		ARMul_CoProAttach (state, 15, MMUInit, NULL, NULL, NULL,
+				   MMUMRC, MMUMCR, NULL, NULL, NULL, NULL, NULL);
+	}
+	else {			//all except xscale
+		ARMul_CoProAttach (state, 15, MMUInit, NULL, NULL, NULL,
+				   //                  MMUMRC, MMUMCR, NULL, MMURead, MMUWrite);
+				   MMUMRC, MMUMCR, NULL, NULL, NULL, NULL, NULL);
+	}
+//chy 2003-09-03 do it in future!!!!????
+#if 0
+	if (state->is_iWMMXt) {
+		ARMul_CoProAttach (state, 0, NULL, NULL, IwmmxtLDC, IwmmxtSTC,
+				   NULL, NULL, IwmmxtCDP, NULL, NULL);
+
+		ARMul_CoProAttach (state, 1, NULL, NULL, NULL, NULL,
+				   IwmmxtMRC, IwmmxtMCR, IwmmxtCDP, NULL,
+				   NULL);
+	}
+#endif
+	//-----------------------------------------------------------------------------
+	//chy 2004-05-25, found the user/system code visit CP 1,2, so I add below code.
+	ARMul_CoProAttach (state, 1, NULL, NULL, NULL, NULL,
+			   ValMRC, ValMCR, NULL, NULL, NULL, NULL, NULL);
+	ARMul_CoProAttach (state, 2, NULL, NULL, ValLDC, ValSTC,
+			   NULL, NULL, NULL, NULL, NULL, NULL, NULL);
+	//------------------------------------------------------------------------------
+	/* No handlers below here.  */
+
+	/* Call all the initialisation routines.  */
+	for (i = 0; i < 16; i++)
+		if (state->CPInit[i])
+			(state->CPInit[i]) (state);
+
+	return TRUE;
+}
+
+/* Install co-processor finalisation routines in this routine.  */
+
+void
+ARMul_CoProExit (ARMul_State * state)
+{
+	register unsigned i;
+
+	for (i = 0; i < 16; i++)
+		if (state->CPExit[i])
+			(state->CPExit[i]) (state);
+
+	for (i = 0; i < 16; i++)	/* Detach all handlers.  */
+		ARMul_CoProDetach (state, i);
+}
+
+/* Routines to hook Co-processors into ARMulator.  */
+
+void
+ARMul_CoProAttach (ARMul_State * state,
+		   unsigned number,
+		   ARMul_CPInits * init,
+		   ARMul_CPExits * exit,
+		   ARMul_LDCs * ldc,
+		   ARMul_STCs * stc,
+		   ARMul_MRCs * mrc,
+		   ARMul_MCRs * mcr,
+		   ARMul_MRRCs * mrrc,
+		   ARMul_MCRRs * mcrr,
+		   ARMul_CDPs * cdp,
+		   ARMul_CPReads * read, ARMul_CPWrites * write)
+{
+	if (init != NULL)
+		state->CPInit[number] = init;
+	if (exit != NULL)
+		state->CPExit[number] = exit;
+	if (ldc != NULL)
+		state->LDC[number] = ldc;
+	if (stc != NULL)
+		state->STC[number] = stc;
+	if (mrc != NULL)
+		state->MRC[number] = mrc;
+	if (mcr != NULL)
+		state->MCR[number] = mcr;
+	if (mrrc != NULL)
+		state->MRRC[number] = mrrc;
+	if (mcrr != NULL)
+		state->MCRR[number] = mcrr;
+	if (cdp != NULL)
+		state->CDP[number] = cdp;
+	if (read != NULL)
+		state->CPRead[number] = read;
+	if (write != NULL)
+		state->CPWrite[number] = write;
+}
+
+void
+ARMul_CoProDetach (ARMul_State * state, unsigned number)
+{
+	ARMul_CoProAttach (state, number, NULL, NULL,
+			   NoCoPro4R, NoCoPro4W, NoCoPro4W, NoCoPro4R,
+			   NoCoPro5W, NoCoPro5R, NoCoPro3R, NULL, NULL);
+
+	state->CPInit[number] = NULL;
+	state->CPExit[number] = NULL;
+	state->CPRead[number] = NULL;
+	state->CPWrite[number] = NULL;
+}
+
+//chy 2003-09-03:below funs just replace the old ones
+
+/* Set the XScale FSR and FAR registers.  */
+
+void
+XScale_set_fsr_far (ARMul_State * state, ARMword fsr, ARMword _far)
+{
+	//if (!state->is_XScale || (read_cp14_reg (10) & (1UL << 31)) == 0)
+	if (!state->is_XScale)
+		return;
+	//assume opcode2=0 crm =0
+	xscale_cp15_write_reg (state, 5, fsr);
+	xscale_cp15_write_reg (state, 6, _far);
+}
+
+//chy 2003-09-03 seems 0 is CANT, 1 is DONE ????
+int
+XScale_debug_moe (ARMul_State * state, int moe)
+{
+	//chy 2003-09-03 , W/R CP14 reg, now it's no use ????
+	printf ("SKYEYE: XScale_debug_moe called !!!!\n");
+	return 1;
+}
diff --git a/src/core/arm/interpreter/armemu.cpp b/src/core/arm/interpreter/armemu.cpp
index a35c5c8dc..87141653f 100644
--- a/src/core/arm/interpreter/armemu.cpp
+++ b/src/core/arm/interpreter/armemu.cpp
@@ -23,17 +23,6 @@
 #include "armemu.h"
 #include "armos.h"
 
-void
-XScale_set_fsr_far(ARMul_State * state, ARMword fsr, ARMword _far)
-{
-    _dbg_assert_msg_(ARM11, false, "ImplementMe: XScale_set_fsr_far!");
-    //if (!state->is_XScale || (read_cp14_reg(10) & (1UL << 31)) == 0)
-    //    return;
-    //
-    //write_cp15_reg(state, 5, 0, 0, fsr);
-    //write_cp15_reg(state, 6, 0, 0, _far);
-}
-
 #define ARMul_Debug(x,y,z) 0 // Disabling this /bunnei
 
 //#include "skyeye_callback.h"
@@ -5536,14 +5525,15 @@ Handle_Load_Double (ARMul_State * state, ARMword instr)
         addr = base;
 
     /* The address must be aligned on a 8 byte boundary.  */
-    if (addr & 0x7) {
-#ifdef ABORTS
-        ARMul_DATAABORT (addr);
-#else
-        ARMul_UndefInstr (state, instr);
-#endif
-        return;
-    }
+    // FIX(Normatt): Disable strict alignment on LDRD/STRD
+//    if (addr & 0x7) {
+//#ifdef ABORTS
+//        ARMul_DATAABORT (addr);
+//#else
+//        ARMul_UndefInstr (state, instr);
+//#endif
+//        return;
+//    }
 
     /* For pre indexed or post indexed addressing modes,
        check that the destination registers do not overlap
@@ -5640,14 +5630,15 @@ Handle_Store_Double (ARMul_State * state, ARMword instr)
         addr = base;
 
     /* The address must be aligned on a 8 byte boundary.  */
-    if (addr & 0x7) {
-#ifdef ABORTS
-        ARMul_DATAABORT (addr);
-#else
-        ARMul_UndefInstr (state, instr);
-#endif
-        return;
-    }
+    // FIX(Normatt): Disable strict alignment on LDRD/STRD
+//    if (addr & 0x7) {
+//#ifdef ABORTS
+//        ARMul_DATAABORT (addr);
+//#else
+//        ARMul_UndefInstr (state, instr);
+//#endif
+//        return;
+//    }
 
     /* For pre indexed or post indexed addressing modes,
        check that the destination registers do not overlap
@@ -6405,6 +6396,8 @@ handle_v6_insn (ARMul_State * state, ARMword instr)
         if (state->Aborted) {
             TAKEABORT;
         }
+        // FIX(Normmatt): Handle RD in STREX/STREXB
+        state->Reg[DESTReg] = 0; //Always succeed
 
         return 1;
      }
@@ -6432,7 +6425,8 @@ handle_v6_insn (ARMul_State * state, ARMword instr)
         if (state->Aborted) {
             TAKEABORT;
         }
-
+        // FIX(Normmatt): Handle RD in STREX/STREXB
+        state->Reg[DESTReg] = 0; //Always succeed
         //printf("In %s, strexb not implemented\n", __FUNCTION__);
         UNDEF_LSRBPC;
         /* WRITESDEST (dest); */
diff --git a/src/core/arm/interpreter/armemu.h b/src/core/arm/interpreter/armemu.h
index 7c118948a..7ccb07e8d 100644
--- a/src/core/arm/interpreter/armemu.h
+++ b/src/core/arm/interpreter/armemu.h
@@ -17,9 +17,9 @@
 #ifndef __ARMEMU_H__
 #define __ARMEMU_H__
 
-#include "common/common.h"
-#include "armdefs.h"
-//#include "skyeye.h"
+
+#include "core/arm/interpreter/skyeye_defs.h"
+#include "core/arm/interpreter/armdefs.h"
 
 extern ARMword isize;
 
@@ -73,9 +73,7 @@ extern ARMword isize;
 #define ASSIGNT(res) state->TFlag = res
 #define INSN_SIZE (TFLAG ? 2 : 4)
 #else
-#define TBIT (1L << 5)
 #define INSN_SIZE 4
-#define TFLAG 0
 #endif
 
 /*add armv6 CPSR  feature*/
@@ -166,6 +164,7 @@ extern ARMword isize;
 #define PCWRAP(pc) ((pc) & R15PCBITS)
 #endif
 
+#define PC (state->Reg[15] & PCMASK)
 #define R15CCINTMODE (state->Reg[15] & (CCBITS | R15INTBITS | R15MODEBITS))
 #define R15INT (state->Reg[15] & R15INTBITS)
 #define R15INTPC (state->Reg[15] & (R15INTBITS | R15PCBITS))
@@ -180,11 +179,11 @@ extern ARMword isize;
 #define ER15INT (IFFLAGS << 26)
 #define EMODE (state->Mode)
 
-//#ifdef MODET
-//#define CPSR (ECC | EINT | EMODE | (TFLAG << 5))
-//#else
-//#define CPSR (ECC | EINT | EMODE)
-//#endif
+#ifdef MODET
+#define CPSR (ECC | EINT | EMODE | (TFLAG << 5))
+#else
+#define CPSR (ECC | EINT | EMODE)
+#endif
 
 #ifdef MODE32
 #define PATCHR15
@@ -240,12 +239,12 @@ extern ARMword isize;
     }									\
   while (0)
 
-//#ifndef MODE32
+#ifndef MODE32
 #define VECTORS 0x20
 #define LEGALADDR 0x03ffffff
 #define VECTORACCESS(address) (address < VECTORS && ARMul_MODE26BIT && state->prog32Sig)
 #define ADDREXCEPT(address)   (address > LEGALADDR && !state->data32Sig)
-//#endif
+#endif
 
 #define INTERNALABORT(address)			\
   do						\
@@ -420,12 +419,10 @@ extern ARMword isize;
      || (! (STATE)->is_XScale)				\
      || (read_cp15_reg (15, 0, 1) & (1 << (CP))))
 */
-//#define CP_ACCESS_ALLOWED(STATE, CP)			\
-//	(((CP) >= 14)					\
-//	|| (!(STATE)->is_XScale)				\
-//	|| (xscale_cp15_cp_access_allowed(STATE, 15, CP)))
-
-#define CP_ACCESS_ALLOWED(STATE, CP) false // Disabled coprocessor shit /bunnei
+#define CP_ACCESS_ALLOWED(STATE, CP)			\
+    (   ((CP) >= 14)					\
+     || (! (STATE)->is_XScale)				\
+     || (xscale_cp15_cp_access_allowed(STATE,15,CP)))
 
 /* Macro to rotate n right by b bits.  */
 #define ROTATER(n, b) (((n) >> (b)) | ((n) << (32 - (b))))
@@ -517,7 +514,7 @@ tdstate;
  * out-of-updated with the newer ISA.
  * -- Michael.Kang
  ********************************************************************************/
-#define UNDEF_WARNING ERROR_LOG(ARM11, "undefined or unpredicted behavior for arm instruction.\n");
+#define UNDEF_WARNING WARN_LOG(ARM11, "undefined or unpredicted behavior for arm instruction.\n");
 
 /* Macros to scrutinize instructions.  */
 #define UNDEF_Test UNDEF_WARNING
diff --git a/src/core/arm/interpreter/arminit.cpp b/src/core/arm/interpreter/arminit.cpp
index a8aeecdea..2c771cdda 100644
--- a/src/core/arm/interpreter/arminit.cpp
+++ b/src/core/arm/interpreter/arminit.cpp
@@ -23,8 +23,8 @@
 
 #include <math.h>
 
-#include "armdefs.h"
-#include "armemu.h"
+#include "core/arm/interpreter/armdefs.h"
+#include "core/arm/interpreter/armemu.h"
 
 /***************************************************************************\
 *                 Definitions for the emulator architecture                 *
@@ -271,7 +271,7 @@ below line sould be in skyeye_mach_XXX.c 's XXX_mach_init function
 
 	/* Only initialse the coprocessor support once we
 	   know what kind of chip we are dealing with.  */
-	//ARMul_CoProInit (state); Commented out /bunnei
+	ARMul_CoProInit (state);
 
 }
 
@@ -318,7 +318,7 @@ ARMul_Reset (ARMul_State * state)
 	state->NumFcycles = 0;
 
 	//fprintf(stderr,"armul_reset 3: state->  Cpsr 0x%x, Mode %d\n",state->Cpsr,state->Mode);  
-	//mmu_reset (state); Commented out /bunnei
+	mmu_reset (state);
 	//fprintf(stderr,"armul_reset 4: state->  Cpsr 0x%x, Mode %d\n",state->Cpsr,state->Mode);  
 
 	//mem_reset (state); /* move to memory/ram.c */
diff --git a/src/core/arm/interpreter/armmmu.h b/src/core/arm/interpreter/armmmu.h
index 8b24e6151..818108c9c 100644
--- a/src/core/arm/interpreter/armmmu.h
+++ b/src/core/arm/interpreter/armmmu.h
@@ -172,18 +172,18 @@ typedef struct mmu_ops_s
 } mmu_ops_t;
 
 
-#include "core/arm/mmu/tlb.h"
-#include "core/arm/mmu/rb.h"
-#include "core/arm/mmu/wb.h"
-#include "core/arm/mmu/cache.h"
+#include "core/arm/interpreter/mmu/tlb.h"
+#include "core/arm/interpreter/mmu/rb.h"
+#include "core/arm/interpreter/mmu/wb.h"
+#include "core/arm/interpreter/mmu/cache.h"
 
 /*special process mmu.h*/
-//#include "core/arm/mmu/sa_mmu.h"
-//#include "core/arm/mmu/arm7100_mmu.h"
-//#include "core/arm/mmu/arm920t_mmu.h"
-//#include "core/arm/mmu/arm926ejs_mmu.h"
-#include "core/arm/mmu/arm1176jzf_s_mmu.h"
-//#include "core/arm/mmu/cortex_a9_mmu.h"
+#include "core/arm/interpreter/mmu/sa_mmu.h"
+//#include "core/arm/interpreter/mmu/arm7100_mmu.h"
+//#include "core/arm/interpreter/mmu/arm920t_mmu.h"
+//#include "core/arm/interpreter/mmu/arm926ejs_mmu.h"
+#include "core/arm/interpreter/mmu/arm1176jzf_s_mmu.h"
+//#include "core/arm/interpreter/mmu/cortex_a9_mmu.h"
 
 typedef struct mmu_state_t
 {
@@ -213,13 +213,13 @@ typedef struct mmu_state_t
 	ARMword copro_access;	// 15
 
 	mmu_ops_t ops;
-	//union
-	//{
-		//sa_mmu_t sa_mmu;
+	union
+	{
+		sa_mmu_t sa_mmu;
 		//arm7100_mmu_t arm7100_mmu;
 		//arm920t_mmu_t arm920t_mmu;
 		//arm926ejs_mmu_t arm926ejs_mmu;
-	//} u;
+	} u;
 } mmu_state_t;
 
 int mmu_init (ARMul_State * state);
diff --git a/src/core/arm/interpreter/armsupp.cpp b/src/core/arm/interpreter/armsupp.cpp
index b2bbedc18..7816c4c42 100644
--- a/src/core/arm/interpreter/armsupp.cpp
+++ b/src/core/arm/interpreter/armsupp.cpp
@@ -15,12 +15,10 @@
     along with this program; if not, write to the Free Software
     Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
 
-#include "armdefs.h"
-#include "armemu.h"
-
-//#include "ansidecl.h"
-#include "skyeye_defs.h"
-#include "core/hle/mrc.h"
+#include "core/arm/interpreter/armdefs.h"
+#include "core/arm/interpreter/armemu.h"
+#include "core/arm/interpreter/skyeye_defs.h"
+#include "core/hle/coprocessor.h"
 #include "core/arm/disassembler/arm_disasm.h"
 
 unsigned xscale_cp15_cp_access_allowed (ARMul_State * state, unsigned reg,
@@ -127,8 +125,7 @@ ARMul_GetCPSR (ARMul_State * state)
 {
 	//chy 2003-08-20: below is from gdb20030716, maybe isn't suitable for system simulator
 	//return (CPSR | state->Cpsr); for gdb20030716
-    // NOTE(bunnei): Changed this from [now] commented out macro "CPSR"
-    return ((ECC | EINT | EMODE | (TFLAG << 5)));	//had be tested in old skyeye with gdb5.0-5.3
+	return (CPSR);		//had be tested in old skyeye with gdb5.0-5.3
 }
 
 /* This routine sets the value of the CPSR.  */
@@ -145,7 +142,7 @@ ARMul_SetCPSR (ARMul_State * state, ARMword value)
 
 void
 ARMul_FixCPSR (ARMul_State * state, ARMword instr, ARMword rhs)
-{  
+{
 	state->Cpsr = ARMul_GetCPSR (state);
 	//chy 2006-02-16 , should not consider system mode, don't conside 26bit mode
 	if (state->Mode != USER26MODE && state->Mode != USER32MODE ) {
@@ -500,8 +497,8 @@ ARMul_LDC (ARMul_State * state, ARMword instr, ARMword address)
 		return;
 	}
 
-	if (ADDREXCEPT (address))
-		INTERNALABORT (address);
+	//if (ADDREXCEPT (address))
+	//	INTERNALABORT (address);
 
 	cpab = (state->LDC[CPNum]) (state, ARMul_FIRST, instr, 0);
 	while (cpab == ARMul_BUSY) {
@@ -594,8 +591,8 @@ ARMul_STC (ARMul_State * state, ARMword instr, ARMword address)
 		return;
 	}
 
-	if (ADDREXCEPT (address) || VECTORACCESS (address))
-		INTERNALABORT (address);
+	//if (ADDREXCEPT (address) || VECTORACCESS (address))
+	//	INTERNALABORT (address);
 
 	cpab = (state->STC[CPNum]) (state, ARMul_FIRST, instr, &data);
 	while (cpab == ARMul_BUSY) {
@@ -739,39 +736,43 @@ ARMul_MRC (ARMul_State * state, ARMword instr)
 {
 	unsigned cpab;
 
-	ARMword result = HLE::CallMRC((HLE::ARM11_MRC_OPERATION)BITS(20, 27));
-
-	////printf("SKYEYE ARMul_MRC, CPnum is %x, instr %x\n",CPNum, instr);
-	//if (!CP_ACCESS_ALLOWED (state, CPNum)) {
-	//	//chy 2004-07-19 should fix in the future????!!!!
-	//	//printf("SKYEYE ARMul_MRC,NOT ALLOWed UndefInstr  CPnum is %x, instr %x\n",CPNum, instr);
-	//	ARMul_UndefInstr (state, instr);
-	//	return -1;
-	//}
-
-	//cpab = (state->MRC[CPNum]) (state, ARMul_FIRST, instr, &result);
-	//while (cpab == ARMul_BUSY) {
-	//	ARMul_Icycles (state, 1, 0);
-	//	if (IntPending (state)) {
-	//		cpab = (state->MRC[CPNum]) (state, ARMul_INTERRUPT,
-	//					    instr, 0);
-	//		return (0);
-	//	}
-	//	else
-	//		cpab = (state->MRC[CPNum]) (state, ARMul_BUSY, instr,
-	//					    &result);
-	//}
-	//if (cpab == ARMul_CANT) {
-	//	printf ("SKYEYE ARMul_MRC,CANT UndefInstr  CPnum is %x, instr %x\n", CPNum, instr);
-	//	ARMul_Abort (state, ARMul_UndefinedInstrV);
-	//	/* Parent will destroy the flags otherwise.  */
-	//	result = ECC;
-	//}
-	//else {
-	//	BUSUSEDINCPCN;
-	//	ARMul_Ccycles (state, 1, 0);
-	//	ARMul_Icycles (state, 1, 0);
-	//}
+	ARMword result = HLE::CallMRC(instr);
+
+	if (result != -1) {
+		return result;
+	}
+
+	//printf("SKYEYE ARMul_MRC, CPnum is %x, instr %x\n",CPNum, instr);
+	if (!CP_ACCESS_ALLOWED (state, CPNum)) {
+		//chy 2004-07-19 should fix in the future????!!!!
+		//printf("SKYEYE ARMul_MRC,NOT ALLOWed UndefInstr  CPnum is %x, instr %x\n",CPNum, instr);
+		ARMul_UndefInstr (state, instr);
+		return -1;
+	}
+
+	cpab = (state->MRC[CPNum]) (state, ARMul_FIRST, instr, &result);
+	while (cpab == ARMul_BUSY) {
+		ARMul_Icycles (state, 1, 0);
+		if (IntPending (state)) {
+			cpab = (state->MRC[CPNum]) (state, ARMul_INTERRUPT,
+						    instr, 0);
+			return (0);
+		}
+		else
+			cpab = (state->MRC[CPNum]) (state, ARMul_BUSY, instr,
+						    &result);
+	}
+	if (cpab == ARMul_CANT) {
+		printf ("SKYEYE ARMul_MRC,CANT UndefInstr  CPnum is %x, instr %x\n", CPNum, instr);
+		ARMul_Abort (state, ARMul_UndefinedInstrV);
+		/* Parent will destroy the flags otherwise.  */
+		result = ECC;
+	}
+	else {
+		BUSUSEDINCPCN;
+		ARMul_Ccycles (state, 1, 0);
+		ARMul_Icycles (state, 1, 0);
+	}
 
 	return result;
 }
@@ -906,9 +907,7 @@ ARMul_ScheduleEvent (ARMul_State * state, unsigned int delay,
 		state->Now = ARMul_Time (state);
 	when = (state->Now + delay) % EVENTLISTSIZE;
 	event = (struct EventNode *) malloc (sizeof (struct EventNode));
-
 	_dbg_assert_msg_(ARM11, event, "SKYEYE:ARMul_ScheduleEvent: malloc event error\n");
-
 	event->func = what;
 	event->next = *(state->EventPtr + when);
 	*(state->EventPtr + when) = event;
diff --git a/src/core/arm/mmu/arm1176jzf_s_mmu.cpp b/src/core/arm/interpreter/mmu/arm1176jzf_s_mmu.cpp
index 7e7fbfbfa..a32f076b9 100644
--- a/src/core/arm/mmu/arm1176jzf_s_mmu.cpp
+++ b/src/core/arm/interpreter/mmu/arm1176jzf_s_mmu.cpp
@@ -355,7 +355,7 @@ arm1176jzf_s_mmu_load_instr (ARMul_State *state, ARMword va, ARMword *instr)
 
     static int debug_count = 0;    /* used for debug */
 
-    DEBUG_LOG(ARM11, "va = %x\n", va);
+    //DEBUG_LOG(ARM11, "va = %x\n", va);
 
     va = mmu_pid_va_map (va);
     if (MMU_Enabled) {
@@ -444,7 +444,7 @@ arm1176jzf_s_mmu_read (ARMul_State *state, ARMword va, ARMword *data,
     ARMword perm;        /* physical addr access permissions */
     int ap, sop;
 
-    DEBUG_LOG(ARM11, "va = %x\n", va);
+    //DEBUG_LOG(ARM11, "va = %x\n", va);
 
     va = mmu_pid_va_map (va);
     real_va = va;
@@ -629,7 +629,7 @@ arm1176jzf_s_mmu_write (ARMul_State *state, ARMword va, ARMword data,
     }
 #endif
 
-    DEBUG_LOG(ARM11, "va = %x, val = %x\n", va, data);
+    //DEBUG_LOG(ARM11, "va = %x, val = %x\n", va, data);
     va = mmu_pid_va_map (va);
     real_va = va;
 
diff --git a/src/core/arm/mmu/arm1176jzf_s_mmu.h b/src/core/arm/interpreter/mmu/arm1176jzf_s_mmu.h
index 299c6b46b..299c6b46b 100644
--- a/src/core/arm/mmu/arm1176jzf_s_mmu.h
+++ b/src/core/arm/interpreter/mmu/arm1176jzf_s_mmu.h
diff --git a/src/core/arm/interpreter/mmu/cache.cpp b/src/core/arm/interpreter/mmu/cache.cpp
new file mode 100644
index 000000000..f3c4e0531
--- /dev/null
+++ b/src/core/arm/interpreter/mmu/cache.cpp
@@ -0,0 +1,370 @@
+#include "core/arm/interpreter/armdefs.h"
+
+/* mmu cache init
+ *
+ * @cache_t :cache_t to init
+ * @width	:cache line width in byte
+ * @way		:way of each cache set
+ * @set		:cache set num
+ *
+ * $ -1: error
+ * 	 0: sucess
+ */
+int
+mmu_cache_init (cache_s * cache_t, int width, int way, int set, int w_mode)
+{
+	int i, j;
+	cache_set_t *sets;
+	cache_line_t *lines;
+
+	/*alloc cache set */
+	sets = NULL;
+	lines = NULL;
+	//fprintf(stderr, "mmu_cache_init: mallloc beg size %d,sets 0x%x\n", sizeof(cache_set_t) * set,sets);
+	//exit(-1);
+	sets = (cache_set_t *) malloc (sizeof (cache_set_t) * set);
+	if (sets == NULL) {
+		ERROR_LOG(ARM11, "set malloc size %d\n", sizeof (cache_set_t) * set);
+		goto sets_error;
+	}
+	//fprintf(stderr, "mmu_cache_init: mallloc end sets 0x%x\n", sets);
+	cache_t->sets = sets;
+
+	/*init cache set */
+	for (i = 0; i < set; i++) {
+		/*alloc cache line */
+		lines = (cache_line_t *) malloc (sizeof (cache_line_t) * way);
+		if (lines == NULL) {
+			ERROR_LOG(ARM11, "line malloc size %d\n",
+				 sizeof (cache_line_t) * way);
+			goto lines_error;
+		}
+		/*init cache line */
+		for (j = 0; j < way; j++) {
+			lines[j].tag = 0;	//invalid
+			lines[j].data = (ARMword *) malloc (width);
+			if (lines[j].data == NULL) {
+				ERROR_LOG(ARM11, "data alloc size %d\n", width);
+				goto data_error;
+			}
+		}
+
+		sets[i].lines = lines;
+		sets[i].cycle = 0;
+
+	}
+	cache_t->width = width;
+	cache_t->set = set;
+	cache_t->way = way;
+	cache_t->w_mode = w_mode;
+	return 0;
+
+      data_error:
+	/*free data */
+	while (j-- > 0)
+		free (lines[j].data);
+	/*free data error line */
+	free (lines);
+      lines_error:
+	/*free lines already alloced */
+	while (i-- > 0) {
+		for (j = 0; j < way; j++)
+			free (sets[i].lines[j].data);
+		free (sets[i].lines);
+	}
+	/*free sets */
+	free (sets);
+      sets_error:
+	return -1;
+};
+
+/* free a cache_t's inner data, the ptr self is not freed,
+ * when needed do like below:
+ * 		mmu_cache_exit(cache);
+ * 		free(cache_t);
+ *
+ * @cache_t : the cache_t to free
+ */
+
+void
+mmu_cache_exit (cache_s * cache_t)
+{
+	int i, j;
+	cache_set_t *sets, *set;
+	cache_line_t *lines, *line;
+
+	/*free all set */
+	sets = cache_t->sets;
+	for (set = sets, i = 0; i < cache_t->set; i++, set++) {
+		/*free all line */
+		lines = set->lines;
+		for (line = lines, j = 0; j < cache_t->way; j++, line++)
+			free (line->data);
+		free (lines);
+	}
+	free (sets);
+}
+
+/* mmu cache search
+ *
+ * @state	:ARMul_State
+ * @cache_t	:cache_t to search
+ * @va		:virtual address
+ *
+ * $	NULL:	no cache match
+ * 		cache	:cache matched
+ */
+cache_line_t *
+mmu_cache_search (ARMul_State * state, cache_s * cache_t, ARMword va)
+{
+	int i;
+	int set = va_cache_set (va, cache_t);
+	ARMword tag = va_cache_align (va, cache_t);
+	cache_line_t *cache;
+
+	cache_set_t *cache_set = cache_t->sets + set;
+	for (i = 0, cache = cache_set->lines; i < cache_t->way; i++, cache++) {
+		if ((cache->tag & TAG_VALID_FLAG)
+		    && (tag == va_cache_align (cache->tag, cache_t)))
+			return cache;
+	}
+	return NULL;
+}
+
+/* mmu cache search by set/index
+ *
+ * @state	:ARMul_State
+ * @cache_t	:cache_t to search
+ * @index	:set/index value. 
+ *
+ * $	NULL:	no cache match
+ * 		cache	:cache matched
+ */
+cache_line_t *
+mmu_cache_search_by_index (ARMul_State * state, cache_s * cache_t,
+			   ARMword index)
+{
+	int way = cache_t->way;
+	int set_v = index_cache_set (index, cache_t);
+	int i = 0, index_v = 0;
+	cache_set_t *set;
+
+	while ((way >>= 1) >= 1)
+		i++;
+	index_v = index >> (32 - i);
+	set = cache_t->sets + set_v;
+
+	return set->lines + index_v;
+}
+
+
+/* mmu cache alloc
+ *
+ * @state :ARMul_State
+ * @cache_t	:cache_t to alloc from
+ * @va		:virtual address that require cache alloc, need not cache aligned
+ * @pa		:physical address of va
+ *
+ * $	cache_alloced, always alloc OK
+ */
+cache_line_t *
+mmu_cache_alloc (ARMul_State * state, cache_s * cache_t, ARMword va,
+		 ARMword pa)
+{
+	cache_line_t *cache;
+	cache_set_t *set;
+	int i;
+
+	va = va_cache_align (va, cache_t);
+	pa = va_cache_align (pa, cache_t);
+
+	set = &cache_t->sets[va_cache_set (va, cache_t)];
+
+	/*robin-round */
+	cache = &set->lines[set->cycle++];
+	if (set->cycle == cache_t->way)
+		set->cycle = 0;
+
+	if (cache_t->w_mode == CACHE_WRITE_BACK) {
+		ARMword t;
+
+		/*if cache valid, try to write back */
+		if (cache->tag & TAG_VALID_FLAG) {
+			mmu_cache_write_back (state, cache_t, cache);
+		}
+		/*read in cache_line */
+		t = pa;
+		for (i = 0; i < (cache_t->width >> WORD_SHT);
+		     i++, t += WORD_SIZE) {
+			//cache->data[i] = mem_read_word (state, t);
+			bus_read(32, t, &cache->data[i]);
+		}
+	}
+	/*store tag and pa */
+	cache->tag = va | TAG_VALID_FLAG;
+	cache->pa = pa;
+
+	return cache;
+};
+
+/* mmu_cache_write_back write cache data to memory
+ * @state
+ * @cache_t :cache_t of the cache line
+ * @cache : cache line
+ */
+void
+mmu_cache_write_back (ARMul_State * state, cache_s * cache_t,
+		      cache_line_t * cache)
+{
+	ARMword pa = cache->pa;
+	int nw = cache_t->width >> WORD_SHT;
+	ARMword *data = cache->data;
+	int i;
+	int t0, t1, t2;
+
+	if ((cache->tag & 1) == 0)
+		return;
+
+	switch (cache->
+		tag & ~1 & (TAG_FIRST_HALF_DIRTY | TAG_LAST_HALF_DIRTY)) {
+	case 0:
+		return;
+	case TAG_FIRST_HALF_DIRTY:
+		nw /= 2;
+		break;
+	case TAG_LAST_HALF_DIRTY:
+		nw /= 2;
+		pa += nw << WORD_SHT;
+		data += nw;
+		break;
+	case TAG_FIRST_HALF_DIRTY | TAG_LAST_HALF_DIRTY:
+		break;
+	}
+	for (i = 0; i < nw; i++, data++, pa += WORD_SIZE)
+		//mem_write_word (state, pa, *data);
+		bus_write(32, pa, *data);
+
+	cache->tag &= ~(TAG_FIRST_HALF_DIRTY | TAG_LAST_HALF_DIRTY);
+};
+
+
+/* mmu_cache_clean: clean a cache of va in cache_t
+ *
+ * @state	:ARMul_State
+ * @cache_t	:cache_t to clean
+ * @va		:virtaul address
+ */
+void
+mmu_cache_clean (ARMul_State * state, cache_s * cache_t, ARMword va)
+{
+	cache_line_t *cache;
+
+	cache = mmu_cache_search (state, cache_t, va);
+	if (cache)
+		mmu_cache_write_back (state, cache_t, cache);
+}
+
+/* mmu_cache_clean_by_index: clean a cache by set/index format value
+ *
+ * @state	:ARMul_State
+ * @cache_t	:cache_t to clean
+ * @va		:set/index format value
+ */
+void
+mmu_cache_clean_by_index (ARMul_State * state, cache_s * cache_t,
+			  ARMword index)
+{
+	cache_line_t *cache;
+
+	cache = mmu_cache_search_by_index (state, cache_t, index);
+	if (cache)
+		mmu_cache_write_back (state, cache_t, cache);
+}
+
+/* mmu_cache_invalidate : invalidate a cache of va
+ *
+ * @state	:ARMul_State
+ * @cache_t	:cache_t to invalid
+ * @va		:virt_addr to invalid
+ */
+void
+mmu_cache_invalidate (ARMul_State * state, cache_s * cache_t, ARMword va)
+{
+	cache_line_t *cache;
+
+	cache = mmu_cache_search (state, cache_t, va);
+	if (cache) {
+		mmu_cache_write_back (state, cache_t, cache);
+		cache->tag = 0;
+	}
+}
+
+/* mmu_cache_invalidate_by_index : invalidate a cache by index format
+ *
+ * @state	:ARMul_State
+ * @cache_t	:cache_t to invalid
+ * @index	:set/index data
+ */
+void
+mmu_cache_invalidate_by_index (ARMul_State * state, cache_s * cache_t,
+			       ARMword index)
+{
+	cache_line_t *cache;
+
+	cache = mmu_cache_search_by_index (state, cache_t, index);
+	if (cache) {
+		mmu_cache_write_back (state, cache_t, cache);
+		cache->tag = 0;
+	}
+}
+
+/* mmu_cache_invalidate_all
+ *
+ * @state:
+ * @cache_t
+ * */
+void
+mmu_cache_invalidate_all (ARMul_State * state, cache_s * cache_t)
+{
+	int i, j;
+	cache_set_t *set;
+	cache_line_t *cache;
+
+	set = cache_t->sets;
+	for (i = 0; i < cache_t->set; i++, set++) {
+		cache = set->lines;
+		for (j = 0; j < cache_t->way; j++, cache++) {
+			mmu_cache_write_back (state, cache_t, cache);
+			cache->tag = 0;
+		}
+	}
+};
+
+void
+mmu_cache_soft_flush (ARMul_State * state, cache_s * cache_t, ARMword pa)
+{
+	ARMword set, way;
+	cache_line_t *cache;
+	pa = (pa / cache_t->width);
+	way = pa & (cache_t->way - 1);
+	set = (pa / cache_t->way) & (cache_t->set - 1);
+	cache = &cache_t->sets[set].lines[way];
+
+	mmu_cache_write_back (state, cache_t, cache);
+	cache->tag = 0;
+}
+
+cache_line_t*  mmu_cache_dirty_cache(ARMul_State *state,cache_s *cache){
+	int i;
+	int j;
+	cache_line_t *cache_line = NULL;
+	cache_set_t *cache_set = cache->sets;
+	int sets = cache->set;
+	for (i = 0; i < sets; i++){
+		for(j = 0,cache_line = &cache_set[i].lines[0]; j < cache->way; j++,cache_line++){
+			if((cache_line->tag & TAG_FIRST_HALF_DIRTY) || (cache_line->tag & TAG_LAST_HALF_DIRTY))
+				return cache_line;
+		}
+	}
+	return NULL;
+}
diff --git a/src/core/arm/mmu/cache.h b/src/core/arm/interpreter/mmu/cache.h
index d308d9b87..d308d9b87 100644
--- a/src/core/arm/mmu/cache.h
+++ b/src/core/arm/interpreter/mmu/cache.h
diff --git a/src/core/arm/interpreter/mmu/maverick.cpp b/src/core/arm/interpreter/mmu/maverick.cpp
new file mode 100644
index 000000000..0e98ef22b
--- /dev/null
+++ b/src/core/arm/interpreter/mmu/maverick.cpp
@@ -0,0 +1,1206 @@
+/*  maverick.c -- Cirrus/DSP co-processor interface.
+    Copyright (C) 2003 Free Software Foundation, Inc.
+    Contributed by Aldy Hernandez (aldyh@redhat.com).
+ 
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+ 
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+ 
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#include <assert.h>
+
+#include "core/arm/interpreter/armdefs.h"
+#include "core/arm/interpreter/armemu.h"
+
+
+/*#define CIRRUS_DEBUG 1	*/
+#if CIRRUS_DEBUG
+#  define printfdbg printf
+#else
+#  define printfdbg printf_nothing
+#endif
+
+#define POS64(i) ( (~(i)) >> 63 )
+#define NEG64(i) ( (i) >> 63 )
+
+/* Define Co-Processor instruction handlers here.  */
+
+/* Here's ARMulator's DSP definition.  A few things to note:
+   1) it has 16 64-bit registers and 4 72-bit accumulators
+   2) you can only access its registers with MCR and MRC.  */
+
+/* We can't define these in here because this file might not be linked
+   unless the target is arm9e-*.  They are defined in wrapper.c.
+   Eventually the simulator should be made to handle any coprocessor
+   at run time.  */
+struct maverick_regs
+{
+	union
+	{
+		int i;
+		float f;
+	} upper;
+
+	union
+	{
+		int i;
+		float f;
+	} lower;
+};
+
+union maverick_acc_regs
+{
+	long double ld;		/* Acc registers are 72-bits.  */
+};
+
+struct maverick_regs DSPregs[16];
+union maverick_acc_regs DSPacc[4];
+ARMword DSPsc;
+
+#define DEST_REG	(BITS (12, 15))
+#define SRC1_REG	(BITS (16, 19))
+#define SRC2_REG	(BITS (0, 3))
+
+static int lsw_int_index, msw_int_index;
+static int lsw_float_index, msw_float_index;
+
+static double mv_getRegDouble (int);
+static long long mv_getReg64int (int);
+static void mv_setRegDouble (int, double val);
+static void mv_setReg64int (int, long long val);
+
+static union
+{
+	double d;
+	long long ll;
+	int ints[2];
+} reg_conv;
+
+static void
+printf_nothing (void *foo, ...)
+{
+}
+
+static void
+cirrus_not_implemented (char *insn)
+{
+	fprintf (stderr, "Cirrus instruction '%s' not implemented.\n", insn);
+	fprintf (stderr, "aborting!\n");
+
+	// skyeye_exit (1);
+}
+
+static unsigned
+DSPInit (ARMul_State * state)
+{
+	NOTICE_LOG(ARM11, "ARMul_ConsolePrint: DSP present");
+	return TRUE;
+}
+
+unsigned
+DSPMRC4 (ARMul_State * state,
+	 unsigned type, ARMword instr, ARMword * value)
+{
+	switch (BITS (5, 7)) {
+	case 0:		/* cfmvrdl */
+		/* Move lower half of a DF stored in a DSP reg into an Arm reg.  */
+		printfdbg ("cfmvrdl\n");
+		printfdbg ("\tlower half=0x%x\n", DSPregs[SRC1_REG].lower.i);
+		printfdbg ("\tentire thing=%g\n", mv_getRegDouble (SRC1_REG));
+
+		*value = (ARMword) DSPregs[SRC1_REG].lower.i;
+		break;
+
+	case 1:		/* cfmvrdh */
+		/* Move upper half of a DF stored in a DSP reg into an Arm reg.  */
+		printfdbg ("cfmvrdh\n");
+		printfdbg ("\tupper half=0x%x\n", DSPregs[SRC1_REG].upper.i);
+		printfdbg ("\tentire thing=%g\n", mv_getRegDouble (SRC1_REG));
+
+		*value = (ARMword) DSPregs[SRC1_REG].upper.i;
+		break;
+
+	case 2:		/* cfmvrs */
+		/* Move SF from upper half of a DSP register to an Arm register.  */
+		*value = (ARMword) DSPregs[SRC1_REG].upper.i;
+		printfdbg ("cfmvrs = mvf%d <-- %f\n",
+			   SRC1_REG, DSPregs[SRC1_REG].upper.f);
+		break;
+
+#ifdef doesnt_work
+	case 4:		/* cfcmps */
+		{
+			float a, b;
+			int n, z, c, v;
+
+			a = DSPregs[SRC1_REG].upper.f;
+			b = DSPregs[SRC2_REG].upper.f;
+
+			printfdbg ("cfcmps\n");
+			printfdbg ("\tcomparing %f and %f\n", a, b);
+
+			z = a == b;	/* zero */
+			n = a != b;	/* negative */
+			v = a > b;	/* overflow */
+			c = 0;	/* carry */
+			*value = (n << 31) | (z << 30) | (c << 29) | (v <<
+								      28);
+			break;
+		}
+
+	case 5:		/* cfcmpd */
+		{
+			double a, b;
+			int n, z, c, v;
+
+			a = mv_getRegDouble (SRC1_REG);
+			b = mv_getRegDouble (SRC2_REG);
+
+			printfdbg ("cfcmpd\n");
+			printfdbg ("\tcomparing %g and %g\n", a, b);
+
+			z = a == b;	/* zero */
+			n = a != b;	/* negative */
+			v = a > b;	/* overflow */
+			c = 0;	/* carry */
+			*value = (n << 31) | (z << 30) | (c << 29) | (v <<
+								      28);
+			break;
+		}
+#else
+	case 4:		/* cfcmps */
+		{
+			float a, b;
+			int n, z, c, v;
+
+			a = DSPregs[SRC1_REG].upper.f;
+			b = DSPregs[SRC2_REG].upper.f;
+
+			printfdbg ("cfcmps\n");
+			printfdbg ("\tcomparing %f and %f\n", a, b);
+
+			z = a == b;	/* zero */
+			n = a < b;	/* negative */
+			c = a > b;	/* carry */
+			v = 0;	/* fixme */
+			printfdbg ("\tz = %d, n = %d\n", z, n);
+			*value = (n << 31) | (z << 30) | (c << 29) | (v <<
+								      28);
+			break;
+		}
+
+	case 5:		/* cfcmpd */
+		{
+			double a, b;
+			int n, z, c, v;
+
+			a = mv_getRegDouble (SRC1_REG);
+			b = mv_getRegDouble (SRC2_REG);
+
+			printfdbg ("cfcmpd\n");
+			printfdbg ("\tcomparing %g and %g\n", a, b);
+
+			z = a == b;	/* zero */
+			n = a < b;	/* negative */
+			c = a > b;	/* carry */
+			v = 0;	/* fixme */
+			*value = (n << 31) | (z << 30) | (c << 29) | (v <<
+								      28);
+			break;
+		}
+#endif
+	default:
+		fprintf (stderr, "unknown opcode in DSPMRC4 0x%x\n", instr);
+		cirrus_not_implemented ("unknown");
+		break;
+	}
+
+	return ARMul_DONE;
+}
+
+unsigned
+DSPMRC5 (ARMul_State * state,
+	 unsigned type, ARMword instr, ARMword * value)
+{
+	switch (BITS (5, 7)) {
+	case 0:		/* cfmvr64l */
+		/* Move lower half of 64bit int from Cirrus to Arm.  */
+		*value = (ARMword) DSPregs[SRC1_REG].lower.i;
+		printfdbg ("cfmvr64l ARM_REG = mvfx%d <-- %d\n",
+			   DEST_REG, (int) *value);
+		break;
+
+	case 1:		/* cfmvr64h */
+		/* Move upper half of 64bit int from Cirrus to Arm.  */
+		*value = (ARMword) DSPregs[SRC1_REG].upper.i;
+		printfdbg ("cfmvr64h <-- %d\n", (int) *value);
+		break;
+
+	case 4:		/* cfcmp32 */
+		{
+			int res;
+			int n, z, c, v;
+			unsigned int a, b;
+
+			printfdbg ("cfcmp32 mvfx%d - mvfx%d\n", SRC1_REG,
+				   SRC2_REG);
+
+			/* FIXME: see comment for cfcmps.  */
+			a = DSPregs[SRC1_REG].lower.i;
+			b = DSPregs[SRC2_REG].lower.i;
+
+			res = DSPregs[SRC1_REG].lower.i -
+				DSPregs[SRC2_REG].lower.i;
+			/* zero */
+			z = res == 0;
+			/* negative */
+			n = res < 0;
+			/* overflow */
+			v = SubOverflow (DSPregs[SRC1_REG].lower.i,
+					 DSPregs[SRC2_REG].lower.i, res);
+			/* carry */
+			c = (NEG (a) && POS (b) ||
+			     (NEG (a) && POS (res)) || (POS (b)
+							&& POS (res)));
+
+			*value = (n << 31) | (z << 30) | (c << 29) | (v <<
+								      28);
+			break;
+		}
+
+	case 5:		/* cfcmp64 */
+		{
+			long long res;
+			int n, z, c, v;
+			unsigned long long a, b;
+
+			printfdbg ("cfcmp64 mvdx%d - mvdx%d\n", SRC1_REG,
+				   SRC2_REG);
+
+			/* fixme: see comment for cfcmps.  */
+
+			a = mv_getReg64int (SRC1_REG);
+			b = mv_getReg64int (SRC2_REG);
+
+			res = mv_getReg64int (SRC1_REG) -
+				mv_getReg64int (SRC2_REG);
+			/* zero */
+			z = res == 0;
+			/* negative */
+			n = res < 0;
+			/* overflow */
+			v = ((NEG64 (a) && POS64 (b) && POS64 (res))
+			     || (POS64 (a) && NEG64 (b) && NEG64 (res)));
+			/* carry */
+			c = (NEG64 (a) && POS64 (b) ||
+			     (NEG64 (a) && POS64 (res)) || (POS64 (b)
+							    && POS64 (res)));
+
+			*value = (n << 31) | (z << 30) | (c << 29) | (v <<
+								      28);
+			break;
+		}
+
+	default:
+		fprintf (stderr, "unknown opcode in DSPMRC5 0x%x\n", instr);
+		cirrus_not_implemented ("unknown");
+		break;
+	}
+
+	return ARMul_DONE;
+}
+
+unsigned
+DSPMRC6 (ARMul_State * state,
+	 unsigned type, ARMword instr, ARMword * value)
+{
+	switch (BITS (5, 7)) {
+	case 0:		/* cfmval32 */
+		cirrus_not_implemented ("cfmval32");
+		break;
+
+	case 1:		/* cfmvam32 */
+		cirrus_not_implemented ("cfmvam32");
+		break;
+
+	case 2:		/* cfmvah32 */
+		cirrus_not_implemented ("cfmvah32");
+		break;
+
+	case 3:		/* cfmva32 */
+		cirrus_not_implemented ("cfmva32");
+		break;
+
+	case 4:		/* cfmva64 */
+		cirrus_not_implemented ("cfmva64");
+		break;
+
+	case 5:		/* cfmvsc32 */
+		cirrus_not_implemented ("cfmvsc32");
+		break;
+
+	default:
+		fprintf (stderr, "unknown opcode in DSPMRC6 0x%x\n", instr);
+		cirrus_not_implemented ("unknown");
+		break;
+	}
+
+	return ARMul_DONE;
+}
+
+unsigned
+DSPMCR4 (ARMul_State * state,
+	 unsigned type, ARMword instr, ARMword value)
+{
+	switch (BITS (5, 7)) {
+	case 0:		/* cfmvdlr */
+		/* Move the lower half of a DF value from an Arm register into
+		   the lower half of a Cirrus register.  */
+		printfdbg ("cfmvdlr <-- 0x%x\n", (int) value);
+		DSPregs[SRC1_REG].lower.i = (int) value;
+		break;
+
+	case 1:		/* cfmvdhr */
+		/* Move the upper half of a DF value from an Arm register into
+		   the upper half of a Cirrus register.  */
+		printfdbg ("cfmvdhr <-- 0x%x\n", (int) value);
+		DSPregs[SRC1_REG].upper.i = (int) value;
+		break;
+
+	case 2:		/* cfmvsr */
+		/* Move SF from Arm register into upper half of Cirrus register.  */
+		printfdbg ("cfmvsr <-- 0x%x\n", (int) value);
+		DSPregs[SRC1_REG].upper.i = (int) value;
+		break;
+
+	default:
+		fprintf (stderr, "unknown opcode in DSPMCR4 0x%x\n", instr);
+		cirrus_not_implemented ("unknown");
+		break;
+	}
+
+	return ARMul_DONE;
+}
+
+unsigned
+DSPMCR5 (ARMul_State * state,
+	 unsigned type, ARMword instr, ARMword value)
+{
+	union
+	{
+		int s;
+		unsigned int us;
+	} val;
+
+	switch (BITS (5, 7)) {
+	case 0:		/* cfmv64lr */
+		/* Move lower half of a 64bit int from an ARM register into the
+		   lower half of a DSP register and sign extend it.  */
+		printfdbg ("cfmv64lr mvdx%d <-- 0x%x\n", SRC1_REG,
+			   (int) value);
+		DSPregs[SRC1_REG].lower.i = (int) value;
+		break;
+
+	case 1:		/* cfmv64hr */
+		/* Move upper half of a 64bit int from an ARM register into the
+		   upper half of a DSP register.  */
+		printfdbg ("cfmv64hr ARM_REG = mvfx%d <-- 0x%x\n",
+			   SRC1_REG, (int) value);
+		DSPregs[SRC1_REG].upper.i = (int) value;
+		break;
+
+	case 2:		/* cfrshl32 */
+		printfdbg ("cfrshl32\n");
+		val.us = value;
+		if (val.s > 0)
+			DSPregs[SRC2_REG].lower.i =
+				DSPregs[SRC1_REG].lower.i << value;
+		else
+			DSPregs[SRC2_REG].lower.i =
+				DSPregs[SRC1_REG].lower.i >> -value;
+		break;
+
+	case 3:		/* cfrshl64 */
+		printfdbg ("cfrshl64\n");
+		val.us = value;
+		if (val.s > 0)
+			mv_setReg64int (SRC2_REG,
+					mv_getReg64int (SRC1_REG) << value);
+		else
+			mv_setReg64int (SRC2_REG,
+					mv_getReg64int (SRC1_REG) >> -value);
+		break;
+
+	default:
+		fprintf (stderr, "unknown opcode in DSPMCR5 0x%x\n", instr);
+		cirrus_not_implemented ("unknown");
+		break;
+	}
+
+	return ARMul_DONE;
+}
+
+unsigned
+DSPMCR6 (ARMul_State * state,
+	 unsigned type, ARMword instr, ARMword value)
+{
+	switch (BITS (5, 7)) {
+	case 0:		/* cfmv32al */
+		cirrus_not_implemented ("cfmv32al");
+		break;
+
+	case 1:		/* cfmv32am */
+		cirrus_not_implemented ("cfmv32am");
+		break;
+
+	case 2:		/* cfmv32ah */
+		cirrus_not_implemented ("cfmv32ah");
+		break;
+
+	case 3:		/* cfmv32a */
+		cirrus_not_implemented ("cfmv32a");
+		break;
+
+	case 4:		/* cfmv64a */
+		cirrus_not_implemented ("cfmv64a");
+		break;
+
+	case 5:		/* cfmv32sc */
+		cirrus_not_implemented ("cfmv32sc");
+		break;
+
+	default:
+		fprintf (stderr, "unknown opcode in DSPMCR6 0x%x\n", instr);
+		cirrus_not_implemented ("unknown");
+		break;
+	}
+
+	return ARMul_DONE;
+}
+
+unsigned
+DSPLDC4 (ARMul_State * state,
+	 unsigned type, ARMword instr, ARMword data)
+{
+	static unsigned words;
+
+	if (type != ARMul_DATA) {
+		words = 0;
+		return ARMul_DONE;
+	}
+
+	if (BIT (22)) {		/* it's a long access, get two words */
+		/* cfldrd */
+
+		printfdbg
+			("cfldrd: %x (words = %d) (bigend = %d) DESTREG = %d\n",
+			 data, words, state->bigendSig, DEST_REG);
+
+		if (words == 0) {
+			if (state->bigendSig)
+				DSPregs[DEST_REG].upper.i = (int) data;
+			else
+				DSPregs[DEST_REG].lower.i = (int) data;
+		}
+		else {
+			if (state->bigendSig)
+				DSPregs[DEST_REG].lower.i = (int) data;
+			else
+				DSPregs[DEST_REG].upper.i = (int) data;
+		}
+
+		++words;
+
+		if (words == 2) {
+			printfdbg ("\tmvd%d <-- mem = %g\n", DEST_REG,
+				   mv_getRegDouble (DEST_REG));
+
+			return ARMul_DONE;
+		}
+		else
+			return ARMul_INC;
+	}
+	else {
+		/* Get just one word.  */
+
+		/* cfldrs */
+		printfdbg ("cfldrs\n");
+
+		DSPregs[DEST_REG].upper.i = (int) data;
+
+		printfdbg ("\tmvf%d <-- mem = %f\n", DEST_REG,
+			   DSPregs[DEST_REG].upper.f);
+
+		return ARMul_DONE;
+	}
+}
+
+unsigned
+DSPLDC5 (ARMul_State * state,
+	 unsigned type, ARMword instr, ARMword data)
+{
+	static unsigned words;
+
+	if (type != ARMul_DATA) {
+		words = 0;
+		return ARMul_DONE;
+	}
+
+	if (BIT (22)) {
+		/* It's a long access, get two words.  */
+
+		/* cfldr64 */
+		printfdbg ("cfldr64: %d\n", data);
+
+		if (words == 0) {
+			if (state->bigendSig)
+				DSPregs[DEST_REG].upper.i = (int) data;
+			else
+				DSPregs[DEST_REG].lower.i = (int) data;
+		}
+		else {
+			if (state->bigendSig)
+				DSPregs[DEST_REG].lower.i = (int) data;
+			else
+				DSPregs[DEST_REG].upper.i = (int) data;
+		}
+
+		++words;
+
+		if (words == 2) {
+			printfdbg ("\tmvdx%d <-- mem = %lld\n", DEST_REG,
+				   mv_getReg64int (DEST_REG));
+
+			return ARMul_DONE;
+		}
+		else
+			return ARMul_INC;
+	}
+	else {
+		/* Get just one word.  */
+
+		/* cfldr32 */
+		printfdbg ("cfldr32 mvfx%d <-- %d\n", DEST_REG, (int) data);
+
+		/* 32bit ints should be sign extended to 64bits when loaded.  */
+		mv_setReg64int (DEST_REG, (long long) data);
+
+		return ARMul_DONE;
+	}
+}
+
+unsigned
+DSPSTC4 (ARMul_State * state,
+	 unsigned type, ARMword instr, ARMword * data)
+{
+	static unsigned words;
+
+	if (type != ARMul_DATA) {
+		words = 0;
+		return ARMul_DONE;
+	}
+
+	if (BIT (22)) {
+		/* It's a long access, get two words.  */
+		/* cfstrd */
+		printfdbg ("cfstrd\n");
+
+		if (words == 0) {
+			if (state->bigendSig)
+				*data = (ARMword) DSPregs[DEST_REG].upper.i;
+			else
+				*data = (ARMword) DSPregs[DEST_REG].lower.i;
+		}
+		else {
+			if (state->bigendSig)
+				*data = (ARMword) DSPregs[DEST_REG].lower.i;
+			else
+				*data = (ARMword) DSPregs[DEST_REG].upper.i;
+		}
+
+		++words;
+
+		if (words == 2) {
+			printfdbg ("\tmem = mvd%d = %g\n", DEST_REG,
+				   mv_getRegDouble (DEST_REG));
+
+			return ARMul_DONE;
+		}
+		else
+			return ARMul_INC;
+	}
+	else {
+		/* Get just one word.  */
+		/* cfstrs */
+		printfdbg ("cfstrs mvf%d <-- %f\n", DEST_REG,
+			   DSPregs[DEST_REG].upper.f);
+
+		*data = (ARMword) DSPregs[DEST_REG].upper.i;
+
+		return ARMul_DONE;
+	}
+}
+
+unsigned
+DSPSTC5 (ARMul_State * state,
+	 unsigned type, ARMword instr, ARMword * data)
+{
+	static unsigned words;
+
+	if (type != ARMul_DATA) {
+		words = 0;
+		return ARMul_DONE;
+	}
+
+	if (BIT (22)) {
+		/* It's a long access, store two words.  */
+		/* cfstr64 */
+		printfdbg ("cfstr64\n");
+
+		if (words == 0) {
+			if (state->bigendSig)
+				*data = (ARMword) DSPregs[DEST_REG].upper.i;
+			else
+				*data = (ARMword) DSPregs[DEST_REG].lower.i;
+		}
+		else {
+			if (state->bigendSig)
+				*data = (ARMword) DSPregs[DEST_REG].lower.i;
+			else
+				*data = (ARMword) DSPregs[DEST_REG].upper.i;
+		}
+
+		++words;
+
+		if (words == 2) {
+			printfdbg ("\tmem = mvd%d = %lld\n", DEST_REG,
+				   mv_getReg64int (DEST_REG));
+
+			return ARMul_DONE;
+		}
+		else
+			return ARMul_INC;
+	}
+	else {
+		/* Store just one word.  */
+		/* cfstr32 */
+		*data = (ARMword) DSPregs[DEST_REG].lower.i;
+
+		printfdbg ("cfstr32 MEM = %d\n", (int) *data);
+
+		return ARMul_DONE;
+	}
+}
+
+unsigned
+DSPCDP4 (ARMul_State * state, unsigned type, ARMword instr)
+{
+	int opcode2;
+
+	opcode2 = BITS (5, 7);
+
+	switch (BITS (20, 21)) {
+	case 0:
+		switch (opcode2) {
+		case 0:	/* cfcpys */
+			printfdbg ("cfcpys mvf%d = mvf%d = %f\n",
+				   DEST_REG, SRC1_REG,
+				   DSPregs[SRC1_REG].upper.f);
+			DSPregs[DEST_REG].upper.f = DSPregs[SRC1_REG].upper.f;
+			break;
+
+		case 1:	/* cfcpyd */
+			printfdbg ("cfcpyd mvd%d = mvd%d = %g\n",
+				   DEST_REG, SRC1_REG,
+				   mv_getRegDouble (SRC1_REG));
+			mv_setRegDouble (DEST_REG,
+					 mv_getRegDouble (SRC1_REG));
+			break;
+
+		case 2:	/* cfcvtds */
+			printfdbg ("cfcvtds mvf%d = (float) mvd%d = %f\n",
+				   DEST_REG, SRC1_REG,
+				   (float) mv_getRegDouble (SRC1_REG));
+			DSPregs[DEST_REG].upper.f =
+				(float) mv_getRegDouble (SRC1_REG);
+			break;
+
+		case 3:	/* cfcvtsd */
+			printfdbg ("cfcvtsd mvd%d = mvf%d = %g\n",
+				   DEST_REG, SRC1_REG,
+				   (double) DSPregs[SRC1_REG].upper.f);
+			mv_setRegDouble (DEST_REG,
+					 (double) DSPregs[SRC1_REG].upper.f);
+			break;
+
+		case 4:	/* cfcvt32s */
+			printfdbg ("cfcvt32s mvf%d = mvfx%d = %f\n",
+				   DEST_REG, SRC1_REG,
+				   (float) DSPregs[SRC1_REG].lower.i);
+			DSPregs[DEST_REG].upper.f =
+				(float) DSPregs[SRC1_REG].lower.i;
+			break;
+
+		case 5:	/* cfcvt32d */
+			printfdbg ("cfcvt32d mvd%d = mvfx%d = %g\n",
+				   DEST_REG, SRC1_REG,
+				   (double) DSPregs[SRC1_REG].lower.i);
+			mv_setRegDouble (DEST_REG,
+					 (double) DSPregs[SRC1_REG].lower.i);
+			break;
+
+		case 6:	/* cfcvt64s */
+			printfdbg ("cfcvt64s mvf%d = mvdx%d = %f\n",
+				   DEST_REG, SRC1_REG,
+				   (float) mv_getReg64int (SRC1_REG));
+			DSPregs[DEST_REG].upper.f =
+				(float) mv_getReg64int (SRC1_REG);
+			break;
+
+		case 7:	/* cfcvt64d */
+			printfdbg ("cfcvt64d mvd%d = mvdx%d = %g\n",
+				   DEST_REG, SRC1_REG,
+				   (double) mv_getReg64int (SRC1_REG));
+			mv_setRegDouble (DEST_REG,
+					 (double) mv_getReg64int (SRC1_REG));
+			break;
+		}
+		break;
+
+	case 1:
+		switch (opcode2) {
+		case 0:	/* cfmuls */
+			printfdbg ("cfmuls mvf%d = mvf%d = %f\n",
+				   DEST_REG,
+				   SRC1_REG,
+				   DSPregs[SRC1_REG].upper.f *
+				   DSPregs[SRC2_REG].upper.f);
+
+			DSPregs[DEST_REG].upper.f = DSPregs[SRC1_REG].upper.f
+				* DSPregs[SRC2_REG].upper.f;
+			break;
+
+		case 1:	/* cfmuld */
+			printfdbg ("cfmuld mvd%d = mvd%d = %g\n",
+				   DEST_REG,
+				   SRC1_REG,
+				   mv_getRegDouble (SRC1_REG) *
+				   mv_getRegDouble (SRC2_REG));
+
+			mv_setRegDouble (DEST_REG, mv_getRegDouble (SRC1_REG)
+					 * mv_getRegDouble (SRC2_REG));
+			break;
+
+		default:
+			fprintf (stderr, "unknown opcode in DSPCDP4 0x%x\n",
+				 instr);
+			cirrus_not_implemented ("unknown");
+			break;
+		}
+		break;
+
+	case 3:
+		switch (opcode2) {
+		case 0:	/* cfabss */
+			DSPregs[DEST_REG].upper.f =
+				(DSPregs[SRC1_REG].upper.f <
+				 0.0F ? -DSPregs[SRC1_REG].upper.
+				 f : DSPregs[SRC1_REG].upper.f);
+			printfdbg ("cfabss mvf%d = |mvf%d| = %f\n", DEST_REG,
+				   SRC1_REG, DSPregs[DEST_REG].upper.f);
+			break;
+
+		case 1:	/* cfabsd */
+			mv_setRegDouble (DEST_REG,
+					 (mv_getRegDouble (SRC1_REG) < 0.0 ?
+					  -mv_getRegDouble (SRC1_REG)
+					  : mv_getRegDouble (SRC1_REG)));
+			printfdbg ("cfabsd mvd%d = |mvd%d| = %g\n",
+				   DEST_REG, SRC1_REG,
+				   mv_getRegDouble (DEST_REG));
+			break;
+
+		case 2:	/* cfnegs */
+			DSPregs[DEST_REG].upper.f =
+				-DSPregs[SRC1_REG].upper.f;
+			printfdbg ("cfnegs mvf%d = -mvf%d = %f\n", DEST_REG,
+				   SRC1_REG, DSPregs[DEST_REG].upper.f);
+			break;
+
+		case 3:	/* cfnegd */
+			mv_setRegDouble (DEST_REG,
+					 -mv_getRegDouble (SRC1_REG));
+			printfdbg ("cfnegd mvd%d = -mvd%d = %g\n", DEST_REG,
+				   mv_getRegDouble (DEST_REG));
+			break;
+
+		case 4:	/* cfadds */
+			DSPregs[DEST_REG].upper.f = DSPregs[SRC1_REG].upper.f
+				+ DSPregs[SRC2_REG].upper.f;
+			printfdbg ("cfadds mvf%d = mvf%d + mvf%d = %f\n",
+				   DEST_REG, SRC1_REG, SRC2_REG,
+				   DSPregs[DEST_REG].upper.f);
+			break;
+
+		case 5:	/* cfaddd */
+			mv_setRegDouble (DEST_REG, mv_getRegDouble (SRC1_REG)
+					 + mv_getRegDouble (SRC2_REG));
+			printfdbg ("cfaddd: mvd%d = mvd%d + mvd%d = %g\n",
+				   DEST_REG,
+				   SRC1_REG, SRC2_REG,
+				   mv_getRegDouble (DEST_REG));
+			break;
+
+		case 6:	/* cfsubs */
+			DSPregs[DEST_REG].upper.f = DSPregs[SRC1_REG].upper.f
+				- DSPregs[SRC2_REG].upper.f;
+			printfdbg ("cfsubs: mvf%d = mvf%d - mvf%d = %f\n",
+				   DEST_REG, SRC1_REG, SRC2_REG,
+				   DSPregs[DEST_REG].upper.f);
+			break;
+
+		case 7:	/* cfsubd */
+			mv_setRegDouble (DEST_REG, mv_getRegDouble (SRC1_REG)
+					 - mv_getRegDouble (SRC2_REG));
+			printfdbg ("cfsubd: mvd%d = mvd%d - mvd%d = %g\n",
+				   DEST_REG,
+				   SRC1_REG, SRC2_REG,
+				   mv_getRegDouble (DEST_REG));
+			break;
+		}
+		break;
+
+	default:
+		fprintf (stderr, "unknown opcode in DSPCDP4 0x%x\n", instr);
+		cirrus_not_implemented ("unknown");
+		break;
+	}
+
+	return ARMul_DONE;
+}
+
+unsigned
+DSPCDP5 (ARMul_State * state, unsigned type, ARMword instr)
+{
+	int opcode2;
+	char shift;
+
+	opcode2 = BITS (5, 7);
+
+	/* Shift constants are 7bit signed numbers in bits 0..3|5..7.  */
+	shift = BITS (0, 3) | (BITS (5, 7)) << 4;
+	if (shift & 0x40)
+		shift |= 0xc0;
+
+	switch (BITS (20, 21)) {
+	case 0:
+		/* cfsh32 */
+		printfdbg ("cfsh32 %s amount=%d\n",
+			   shift < 0 ? "right" : "left", shift);
+		if (shift < 0)
+			/* Negative shift is a right shift.  */
+			DSPregs[DEST_REG].lower.i =
+				DSPregs[SRC1_REG].lower.i >> -shift;
+		else
+			/* Positive shift is a left shift.  */
+			DSPregs[DEST_REG].lower.i =
+				DSPregs[SRC1_REG].lower.i << shift;
+		break;
+
+	case 1:
+		switch (opcode2) {
+		case 0:	/* cfmul32 */
+			DSPregs[DEST_REG].lower.i = DSPregs[SRC1_REG].lower.i
+				* DSPregs[SRC2_REG].lower.i;
+			printfdbg ("cfmul32 mvfx%d = mvfx%d * mvfx%d = %d\n",
+				   DEST_REG, SRC1_REG, SRC2_REG,
+				   DSPregs[DEST_REG].lower.i);
+			break;
+
+		case 1:	/* cfmul64 */
+			mv_setReg64int (DEST_REG, mv_getReg64int (SRC1_REG)
+					* mv_getReg64int (SRC2_REG));
+			printfdbg
+				("cfmul64 mvdx%d = mvdx%d * mvdx%d = %lld\n",
+				 DEST_REG, SRC1_REG, SRC2_REG,
+				 mv_getReg64int (DEST_REG));
+			break;
+
+		case 2:	/* cfmac32 */
+			DSPregs[DEST_REG].lower.i
+				+=
+				DSPregs[SRC1_REG].lower.i *
+				DSPregs[SRC2_REG].lower.i;
+			printfdbg ("cfmac32 mvfx%d += mvfx%d * mvfx%d = %d\n",
+				   DEST_REG, SRC1_REG, SRC2_REG,
+				   DSPregs[DEST_REG].lower.i);
+			break;
+
+		case 3:	/* cfmsc32 */
+			DSPregs[DEST_REG].lower.i
+				-=
+				DSPregs[SRC1_REG].lower.i *
+				DSPregs[SRC2_REG].lower.i;
+			printfdbg ("cfmsc32 mvfx%d -= mvfx%d * mvfx%d = %d\n",
+				   DEST_REG, SRC1_REG, SRC2_REG,
+				   DSPregs[DEST_REG].lower.i);
+			break;
+
+		case 4:	/* cfcvts32 */
+			/* fixme: this should round */
+			DSPregs[DEST_REG].lower.i =
+				(int) DSPregs[SRC1_REG].upper.f;
+			printfdbg ("cfcvts32 mvfx%d = mvf%d = %d\n", DEST_REG,
+				   SRC1_REG, DSPregs[DEST_REG].lower.i);
+			break;
+
+		case 5:	/* cfcvtd32 */
+			/* fixme: this should round */
+			DSPregs[DEST_REG].lower.i =
+				(int) mv_getRegDouble (SRC1_REG);
+			printfdbg ("cfcvtd32 mvdx%d = mvd%d = %d\n", DEST_REG,
+				   SRC1_REG, DSPregs[DEST_REG].lower.i);
+			break;
+
+		case 6:	/* cftruncs32 */
+			DSPregs[DEST_REG].lower.i =
+				(int) DSPregs[SRC1_REG].upper.f;
+			printfdbg ("cftruncs32 mvfx%d = mvf%d = %d\n",
+				   DEST_REG, SRC1_REG,
+				   DSPregs[DEST_REG].lower.i);
+			break;
+
+		case 7:	/* cftruncd32 */
+			DSPregs[DEST_REG].lower.i =
+				(int) mv_getRegDouble (SRC1_REG);
+			printfdbg ("cftruncd32 mvfx%d = mvd%d = %d\n",
+				   DEST_REG, SRC1_REG,
+				   DSPregs[DEST_REG].lower.i);
+			break;
+		}
+		break;
+
+	case 2:
+		/* cfsh64 */
+		printfdbg ("cfsh64\n");
+
+		if (shift < 0)
+			/* Negative shift is a right shift.  */
+			mv_setReg64int (DEST_REG,
+					mv_getReg64int (SRC1_REG) >> -shift);
+		else
+			/* Positive shift is a left shift.  */
+			mv_setReg64int (DEST_REG,
+					mv_getReg64int (SRC1_REG) << shift);
+		printfdbg ("\t%llx\n", mv_getReg64int (DEST_REG));
+		break;
+
+	case 3:
+		switch (opcode2) {
+		case 0:	/* cfabs32 */
+			DSPregs[DEST_REG].lower.i =
+				(DSPregs[SRC1_REG].lower.i <
+				 0 ? -DSPregs[SRC1_REG].lower.
+				 i : DSPregs[SRC1_REG].lower.i);
+			printfdbg ("cfabs32 mvfx%d = |mvfx%d| = %d\n",
+				   DEST_REG, SRC1_REG, SRC2_REG,
+				   DSPregs[DEST_REG].lower.i);
+			break;
+
+		case 1:	/* cfabs64 */
+			mv_setReg64int (DEST_REG,
+					(mv_getReg64int (SRC1_REG) < 0
+					 ? -mv_getReg64int (SRC1_REG)
+					 : mv_getReg64int (SRC1_REG)));
+			printfdbg ("cfabs64 mvdx%d = |mvdx%d| = %lld\n",
+				   DEST_REG, SRC1_REG, SRC2_REG,
+				   mv_getReg64int (DEST_REG));
+			break;
+
+		case 2:	/* cfneg32 */
+			DSPregs[DEST_REG].lower.i =
+				-DSPregs[SRC1_REG].lower.i;
+			printfdbg ("cfneg32 mvfx%d = -mvfx%d = %d\n",
+				   DEST_REG, SRC1_REG, SRC2_REG,
+				   DSPregs[DEST_REG].lower.i);
+			break;
+
+		case 3:	/* cfneg64 */
+			mv_setReg64int (DEST_REG, -mv_getReg64int (SRC1_REG));
+			printfdbg ("cfneg64 mvdx%d = -mvdx%d = %lld\n",
+				   DEST_REG, SRC1_REG, SRC2_REG,
+				   mv_getReg64int (DEST_REG));
+			break;
+
+		case 4:	/* cfadd32 */
+			DSPregs[DEST_REG].lower.i = DSPregs[SRC1_REG].lower.i
+				+ DSPregs[SRC2_REG].lower.i;
+			printfdbg ("cfadd32 mvfx%d = mvfx%d + mvfx%d = %d\n",
+				   DEST_REG, SRC1_REG, SRC2_REG,
+				   DSPregs[DEST_REG].lower.i);
+			break;
+
+		case 5:	/* cfadd64 */
+			mv_setReg64int (DEST_REG, mv_getReg64int (SRC1_REG)
+					+ mv_getReg64int (SRC2_REG));
+			printfdbg
+				("cfadd64 mvdx%d = mvdx%d + mvdx%d = %lld\n",
+				 DEST_REG, SRC1_REG, SRC2_REG,
+				 mv_getReg64int (DEST_REG));
+			break;
+
+		case 6:	/* cfsub32 */
+			DSPregs[DEST_REG].lower.i = DSPregs[SRC1_REG].lower.i
+				- DSPregs[SRC2_REG].lower.i;
+			printfdbg ("cfsub32 mvfx%d = mvfx%d - mvfx%d = %d\n",
+				   DEST_REG, SRC1_REG, SRC2_REG,
+				   DSPregs[DEST_REG].lower.i);
+			break;
+
+		case 7:	/* cfsub64 */
+			mv_setReg64int (DEST_REG, mv_getReg64int (SRC1_REG)
+					- mv_getReg64int (SRC2_REG));
+			printfdbg ("cfsub64 mvdx%d = mvdx%d - mvdx%d = %d\n",
+				   DEST_REG, SRC1_REG, SRC2_REG,
+				   mv_getReg64int (DEST_REG));
+			break;
+		}
+		break;
+
+	default:
+		fprintf (stderr, "unknown opcode in DSPCDP5 0x%x\n", instr);
+		cirrus_not_implemented ("unknown");
+		break;
+	}
+
+	return ARMul_DONE;
+}
+
+unsigned
+DSPCDP6 (ARMul_State * state, unsigned type, ARMword instr)
+{
+	int opcode2;
+
+	opcode2 = BITS (5, 7);
+
+	switch (BITS (20, 21)) {
+	case 0:
+		/* cfmadd32 */
+		cirrus_not_implemented ("cfmadd32");
+		break;
+
+	case 1:
+		/* cfmsub32 */
+		cirrus_not_implemented ("cfmsub32");
+		break;
+
+	case 2:
+		/* cfmadda32 */
+		cirrus_not_implemented ("cfmadda32");
+		break;
+
+	case 3:
+		/* cfmsuba32 */
+		cirrus_not_implemented ("cfmsuba32");
+		break;
+
+	default:
+		fprintf (stderr, "unknown opcode in DSPCDP6 0x%x\n", instr);
+	}
+
+	return ARMul_DONE;
+}
+
+/* Conversion functions.
+
+   32-bit integers are stored in the LOWER half of a 64-bit physical
+   register.
+
+   Single precision floats are stored in the UPPER half of a 64-bit
+   physical register.  */
+
+static double
+mv_getRegDouble (int regnum)
+{
+	reg_conv.ints[lsw_float_index] = DSPregs[regnum].upper.i;
+	reg_conv.ints[msw_float_index] = DSPregs[regnum].lower.i;
+	return reg_conv.d;
+}
+
+static void
+mv_setRegDouble (int regnum, double val)
+{
+	reg_conv.d = val;
+	DSPregs[regnum].upper.i = reg_conv.ints[lsw_float_index];
+	DSPregs[regnum].lower.i = reg_conv.ints[msw_float_index];
+}
+
+static long long
+mv_getReg64int (int regnum)
+{
+	reg_conv.ints[lsw_int_index] = DSPregs[regnum].lower.i;
+	reg_conv.ints[msw_int_index] = DSPregs[regnum].upper.i;
+	return reg_conv.ll;
+}
+
+static void
+mv_setReg64int (int regnum, long long val)
+{
+	reg_conv.ll = val;
+	DSPregs[regnum].lower.i = reg_conv.ints[lsw_int_index];
+	DSPregs[regnum].upper.i = reg_conv.ints[msw_int_index];
+}
+
+/* Compute LSW in a double and a long long.  */
+
+void
+mv_compute_host_endianness (ARMul_State * state)
+{
+	static union
+	{
+		long long ll;
+		int ints[2];
+		int i;
+		double d;
+		float floats[2];
+		float f;
+	} conv;
+
+	/* Calculate where's the LSW in a 64bit int.  */
+	conv.ll = 45;
+
+	if (conv.ints[0] == 0) {
+		msw_int_index = 0;
+		lsw_int_index = 1;
+	}
+	else {
+		assert (conv.ints[1] == 0);
+		msw_int_index = 1;
+		lsw_int_index = 0;
+	}
+
+	/* Calculate where's the LSW in a double.  */
+	conv.d = 3.0;
+
+	if (conv.ints[0] == 0) {
+		msw_float_index = 0;
+		lsw_float_index = 1;
+	}
+	else {
+		assert (conv.ints[1] == 0);
+		msw_float_index = 1;
+		lsw_float_index = 0;
+	}
+
+	printfdbg ("lsw_int_index   %d\n", lsw_int_index);
+	printfdbg ("lsw_float_index %d\n", lsw_float_index);
+}
diff --git a/src/core/arm/interpreter/mmu/rb.cpp b/src/core/arm/interpreter/mmu/rb.cpp
new file mode 100644
index 000000000..07b11e311
--- /dev/null
+++ b/src/core/arm/interpreter/mmu/rb.cpp
@@ -0,0 +1,126 @@
+#include "core/arm/interpreter/armdefs.h"
+
+/*chy 2004-06-06, fix bug found by wenye@cs.ucsb.edu*/
+ARMword rb_masks[] = {
+	0x0,			//RB_INVALID
+	4,			//RB_1
+	16,			//RB_4
+	32,			//RB_8
+};
+
+/*mmu_rb_init
+ * @rb_t	:rb_t to init
+ * @num		:number of entry
+ * */
+int
+mmu_rb_init (rb_s * rb_t, int num)
+{
+	int i;
+	rb_entry_t *entrys;
+
+	entrys = (rb_entry_t *) malloc (sizeof (*entrys) * num);
+	if (entrys == NULL) {
+		printf ("SKYEYE:mmu_rb_init malloc error\n");
+		return -1;
+	}
+	for (i = 0; i < num; i++) {
+		entrys[i].type = RB_INVALID;
+		entrys[i].fault = NO_FAULT;
+	}
+
+	rb_t->entrys = entrys;
+	rb_t->num = num;
+	return 0;
+}
+
+/*mmu_rb_exit*/
+void
+mmu_rb_exit (rb_s * rb_t)
+{
+	free (rb_t->entrys);
+};
+
+/*mmu_rb_search
+ * @rb_t	:rb_t to serach
+ * @va		:va address to math
+ *
+ * $	NULL :not match
+ * 		NO-NULL:
+ * */
+rb_entry_t *
+mmu_rb_search (rb_s * rb_t, ARMword va)
+{
+	int i;
+	rb_entry_t *rb = rb_t->entrys;
+
+	DEBUG_LOG(ARM11, "va = %x\n", va);
+	for (i = 0; i < rb_t->num; i++, rb++) {
+		//2004-06-06 lyh  bug from wenye@cs.ucsb.edu
+		if (rb->type) {
+			if ((va >= rb->va)
+			    && (va < (rb->va + rb_masks[rb->type])))
+				return rb;
+		}
+	}
+	return NULL;
+};
+
+void
+mmu_rb_invalidate_entry (rb_s * rb_t, int i)
+{
+	rb_t->entrys[i].type = RB_INVALID;
+}
+
+void
+mmu_rb_invalidate_all (rb_s * rb_t)
+{
+	int i;
+
+	for (i = 0; i < rb_t->num; i++)
+		mmu_rb_invalidate_entry (rb_t, i);
+};
+
+void
+mmu_rb_load (ARMul_State * state, rb_s * rb_t, int i_rb, int type, ARMword va)
+{
+	rb_entry_t *rb;
+	int i;
+	ARMword max_start, min_end;
+	fault_t fault;
+	tlb_entry_t *tlb;
+
+	/*align va according to type */
+	va &= ~(rb_masks[type] - 1);
+	/*invalidate all RB match [va, va + rb_masks[type]] */
+	for (rb = rb_t->entrys, i = 0; i < rb_t->num; i++, rb++) {
+		if (rb->type) {
+			max_start = max (va, rb->va);
+			min_end =
+				min (va + rb_masks[type],
+				     rb->va + rb_masks[rb->type]);
+			if (max_start < min_end)
+				rb->type = RB_INVALID;
+		}
+	}
+	/*load word */
+	rb = &rb_t->entrys[i_rb];
+	rb->type = type;
+	fault = translate (state, va, D_TLB (), &tlb);
+	if (fault) {
+		rb->fault = fault;
+		return;
+	}
+	fault = check_access (state, va, tlb, 1);
+	if (fault) {
+		rb->fault = fault;
+		return;
+	}
+
+	rb->fault = NO_FAULT;
+	va = tlb_va_to_pa (tlb, va);
+	//2004-06-06 lyh  bug from wenye@cs.ucsb.edu
+	for (i = 0; i < (rb_masks[type] / 4); i++, va += WORD_SIZE) {
+		//rb->data[i] = mem_read_word (state, va);
+		bus_read(32, va, &rb->data[i]);
+	};
+}
diff --git a/src/core/arm/mmu/rb.h b/src/core/arm/interpreter/mmu/rb.h
index 7bf0ebb26..7bf0ebb26 100644
--- a/src/core/arm/mmu/rb.h
+++ b/src/core/arm/interpreter/mmu/rb.h
diff --git a/src/core/arm/interpreter/mmu/sa_mmu.cpp b/src/core/arm/interpreter/mmu/sa_mmu.cpp
new file mode 100644
index 000000000..eff5002de
--- /dev/null
+++ b/src/core/arm/interpreter/mmu/sa_mmu.cpp
@@ -0,0 +1,864 @@
+/*
+    armmmu.c - Memory Management Unit emulation.
+    ARMulator extensions for the ARM7100 family.
+    Copyright (C) 1999  Ben Williamson
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include <assert.h>
+#include <string.h>
+
+#include "core/arm/interpreter/armdefs.h"
+
+/**
+ *  The interface of read data from bus
+ */
+int bus_read(short size, int addr, uint32_t * value) {
+    ERROR_LOG(ARM11, "unimplemented bus_read");
+    return 0;
+}
+
+/**
+ * The interface of write data from bus
+ */
+int bus_write(short size, int addr, uint32_t value) {
+    ERROR_LOG(ARM11, "unimplemented bus_write");
+    return 0;
+}
+
+
+typedef struct sa_mmu_desc_s
+{
+	int i_tlb;
+	cache_desc_t i_cache;
+
+	int d_tlb;
+	cache_desc_t main_d_cache;
+	cache_desc_t mini_d_cache;
+	int rb;
+	wb_desc_t wb;
+} sa_mmu_desc_t;
+
+static sa_mmu_desc_t sa11xx_mmu_desc = {
+	32,
+	{32, 32, 16, CACHE_WRITE_BACK},
+
+	32,
+	{32, 32, 8, CACHE_WRITE_BACK},
+	{32, 2, 8, CACHE_WRITE_BACK},
+	4,
+	//{8, 4},  for word size 
+	{8, 16},		//for byte size,   chy 2003-07-11
+};
+
+static fault_t sa_mmu_write (ARMul_State * state, ARMword va, ARMword data,
+			     ARMword datatype);
+static fault_t sa_mmu_read (ARMul_State * state, ARMword va, ARMword * data,
+			    ARMword datatype);
+static fault_t update_cache (ARMul_State * state, ARMword va, ARMword data,
+			     ARMword datatype, cache_line_t * cache,
+			     cache_s * cache_t, ARMword real_va);
+
+void
+mmu_wb_write_bytes (ARMul_State * state, wb_s * wb_t, ARMword pa,
+		    ARMbyte * data, int n);
+int
+sa_mmu_init (ARMul_State * state)
+{
+	sa_mmu_desc_t *desc;
+	cache_desc_t *c_desc;
+
+	state->mmu.control = 0x70;
+	state->mmu.translation_table_base = 0xDEADC0DE;
+	state->mmu.domain_access_control = 0xDEADC0DE;
+	state->mmu.fault_status = 0;
+	state->mmu.fault_address = 0;
+	state->mmu.process_id = 0;
+
+	desc = &sa11xx_mmu_desc;
+	if (mmu_tlb_init (I_TLB (), desc->i_tlb)) {
+		ERROR_LOG(ARM11, "i_tlb init %d\n", -1);
+		goto i_tlb_init_error;
+	}
+
+	c_desc = &desc->i_cache;
+	if (mmu_cache_init (I_CACHE (), c_desc->width, c_desc->way,
+			    c_desc->set, c_desc->w_mode)) {
+		ERROR_LOG(ARM11, "i_cache init %d\n", -1);
+		goto i_cache_init_error;
+	}
+
+	if (mmu_tlb_init (D_TLB (), desc->d_tlb)) {
+		ERROR_LOG(ARM11, "d_tlb init %d\n", -1);
+		goto d_tlb_init_error;
+	}
+
+	c_desc = &desc->main_d_cache;
+	if (mmu_cache_init (MAIN_D_CACHE (), c_desc->width, c_desc->way,
+			    c_desc->set, c_desc->w_mode)) {
+		ERROR_LOG(ARM11, "main_d_cache init %d\n", -1);
+		goto main_d_cache_init_error;
+	}
+
+	c_desc = &desc->mini_d_cache;
+	if (mmu_cache_init (MINI_D_CACHE (), c_desc->width, c_desc->way,
+			    c_desc->set, c_desc->w_mode)) {
+		ERROR_LOG(ARM11, "mini_d_cache init %d\n", -1);
+		goto mini_d_cache_init_error;
+	}
+
+	if (mmu_wb_init (WB (), desc->wb.num, desc->wb.nb)) {
+		ERROR_LOG(ARM11, "wb init %d\n", -1);
+		goto wb_init_error;
+	}
+
+	if (mmu_rb_init (RB (), desc->rb)) {
+		ERROR_LOG(ARM11, "rb init %d\n", -1);
+		goto rb_init_error;
+	}
+	return 0;
+
+      rb_init_error:
+	mmu_wb_exit (WB ());
+      wb_init_error:
+	mmu_cache_exit (MINI_D_CACHE ());
+      mini_d_cache_init_error:
+	mmu_cache_exit (MAIN_D_CACHE ());
+      main_d_cache_init_error:
+	mmu_tlb_exit (D_TLB ());
+      d_tlb_init_error:
+	mmu_cache_exit (I_CACHE ());
+      i_cache_init_error:
+	mmu_tlb_exit (I_TLB ());
+      i_tlb_init_error:
+	return -1;
+}
+
+void
+sa_mmu_exit (ARMul_State * state)
+{
+	mmu_rb_exit (RB ());
+	mmu_wb_exit (WB ());
+	mmu_cache_exit (MINI_D_CACHE ());
+	mmu_cache_exit (MAIN_D_CACHE ());
+	mmu_tlb_exit (D_TLB ());
+	mmu_cache_exit (I_CACHE ());
+	mmu_tlb_exit (I_TLB ());
+};
+
+
+static fault_t
+sa_mmu_load_instr (ARMul_State * state, ARMword va, ARMword * instr)
+{
+	fault_t fault;
+	tlb_entry_t *tlb;
+	cache_line_t *cache;
+	int c;			//cache bit
+	ARMword pa;		//physical addr
+
+	static int debug_count = 0;	//used for debug
+
+	DEBUG_LOG(ARM11, "va = %x\n", va);
+
+	va = mmu_pid_va_map (va);
+	if (MMU_Enabled) {
+		/*align check */
+		if ((va & (WORD_SIZE - 1)) && MMU_Aligned) {
+			DEBUG_LOG(ARM11, "align\n");
+			return ALIGNMENT_FAULT;
+		}
+		else
+			va &= ~(WORD_SIZE - 1);
+
+		/*translate tlb */
+		fault = translate (state, va, I_TLB (), &tlb);
+		if (fault) {
+			DEBUG_LOG(ARM11, "translate\n");
+			return fault;
+		}
+
+		/*check access */
+		fault = check_access (state, va, tlb, 1);
+		if (fault) {
+			DEBUG_LOG(ARM11, "check_fault\n");
+			return fault;
+		}
+	}
+
+	/*search cache no matter MMU enabled/disabled */
+	cache = mmu_cache_search (state, I_CACHE (), va);
+	if (cache) {
+		*instr = cache->data[va_cache_index (va, I_CACHE ())];
+		return NO_FAULT;
+	}
+
+	/*if MMU disabled or C flag is set alloc cache */
+	if (MMU_Disabled) {
+		c = 1;
+		pa = va;
+	}
+	else {
+		c = tlb_c_flag (tlb);
+		pa = tlb_va_to_pa (tlb, va);
+	}
+
+	if (c) {
+		int index;
+
+		debug_count++;
+		cache = mmu_cache_alloc (state, I_CACHE (), va, pa);
+		index = va_cache_index (va, I_CACHE ());
+		*instr = cache->data[va_cache_index (va, I_CACHE ())];
+	}
+	else
+		//*instr = mem_read_word (state, pa);
+		bus_read(32, pa, instr);
+
+	return NO_FAULT;
+};
+
+
+
+static fault_t
+sa_mmu_read_byte (ARMul_State * state, ARMword virt_addr, ARMword * data)
+{
+	//ARMword temp,offset;
+	fault_t fault;
+	fault = sa_mmu_read (state, virt_addr, data, ARM_BYTE_TYPE);
+	return fault;
+}
+
+static fault_t
+sa_mmu_read_halfword (ARMul_State * state, ARMword virt_addr, ARMword * data)
+{
+	//ARMword temp,offset;
+	fault_t fault;
+	fault = sa_mmu_read (state, virt_addr, data, ARM_HALFWORD_TYPE);
+	return fault;
+}
+
+static fault_t
+sa_mmu_read_word (ARMul_State * state, ARMword virt_addr, ARMword * data)
+{
+	return sa_mmu_read (state, virt_addr, data, ARM_WORD_TYPE);
+}
+
+
+
+
+static fault_t
+sa_mmu_read (ARMul_State * state, ARMword va, ARMword * data,
+	     ARMword datatype)
+{
+	fault_t fault;
+	rb_entry_t *rb;
+	tlb_entry_t *tlb;
+	cache_line_t *cache;
+	ARMword pa, real_va, temp, offset;
+
+	DEBUG_LOG(ARM11, "va = %x\n", va);
+
+	va = mmu_pid_va_map (va);
+	real_va = va;
+	/*if MMU disabled, memory_read */
+	if (MMU_Disabled) {
+		//*data = mem_read_word(state, va);
+		if (datatype == ARM_BYTE_TYPE)
+			//*data = mem_read_byte (state, va);
+			bus_read(8, va, data);
+		else if (datatype == ARM_HALFWORD_TYPE)
+			//*data = mem_read_halfword (state, va);
+			bus_read(16, va, data);
+		else if (datatype == ARM_WORD_TYPE)
+			//*data = mem_read_word (state, va);
+			bus_read(32, va, data);
+		else {
+			printf ("SKYEYE:1 sa_mmu_read error: unknown data type %d\n", datatype);
+			// skyeye_exit (-1);
+		}
+
+		return NO_FAULT;
+	}
+
+	/*align check */
+	if (((va & 3) && (datatype == ARM_WORD_TYPE) && MMU_Aligned) ||
+	    ((va & 1) && (datatype == ARM_HALFWORD_TYPE) && MMU_Aligned)) {
+		DEBUG_LOG(ARM11, "align\n");
+		return ALIGNMENT_FAULT;
+	}			// else
+
+	va &= ~(WORD_SIZE - 1);
+
+	/*translate va to tlb */
+	fault = translate (state, va, D_TLB (), &tlb);
+	if (fault) {
+		DEBUG_LOG(ARM11, "translate\n");
+		return fault;
+	}
+	/*check access permission */
+	fault = check_access (state, va, tlb, 1);
+	if (fault)
+		return fault;
+	/*search in read buffer */
+	rb = mmu_rb_search (RB (), va);
+	if (rb) {
+		if (rb->fault)
+			return rb->fault;
+		*data = rb->data[(va & (rb_masks[rb->type] - 1)) >> WORD_SHT];
+		goto datatrans;
+		//return 0;
+	};
+	/*search main cache */
+	cache = mmu_cache_search (state, MAIN_D_CACHE (), va);
+	if (cache) {
+		*data = cache->data[va_cache_index (va, MAIN_D_CACHE ())];
+		goto datatrans;
+		//return 0;
+	}
+	/*search mini cache */
+	cache = mmu_cache_search (state, MINI_D_CACHE (), va);
+	if (cache) {
+		*data = cache->data[va_cache_index (va, MINI_D_CACHE ())];
+		goto datatrans;
+		//return 0;
+	}
+
+	/*get phy_addr */
+	pa = tlb_va_to_pa (tlb, va);
+	if ((pa >= 0xe0000000) && (pa < 0xe8000000)) {
+		if (tlb_c_flag (tlb)) {
+			if (tlb_b_flag (tlb)) {
+				mmu_cache_soft_flush (state, MAIN_D_CACHE (),
+						      pa);
+			}
+			else {
+				mmu_cache_soft_flush (state, MINI_D_CACHE (),
+						      pa);
+			}
+		}
+		return NO_FAULT;
+	}
+
+	/*if Buffer, drain Write Buffer first */
+	if (tlb_b_flag (tlb))
+		mmu_wb_drain_all (state, WB ());
+
+	/*alloc cache or mem_read */
+	if (tlb_c_flag (tlb) && MMU_DCacheEnabled) {
+		cache_s *cache_t;
+
+		if (tlb_b_flag (tlb))
+			cache_t = MAIN_D_CACHE ();
+		else
+			cache_t = MINI_D_CACHE ();
+		cache = mmu_cache_alloc (state, cache_t, va, pa);
+		*data = cache->data[va_cache_index (va, cache_t)];
+	}
+	else {
+		//*data = mem_read_word(state, pa);
+		if (datatype == ARM_BYTE_TYPE)
+			//*data = mem_read_byte (state, pa | (real_va & 3));
+			bus_read(8, pa | (real_va & 3), data);
+		else if (datatype == ARM_HALFWORD_TYPE)
+			//*data = mem_read_halfword (state, pa | (real_va & 2));
+			bus_read(16, pa | (real_va & 2), data);
+		else if (datatype == ARM_WORD_TYPE)
+			//*data = mem_read_word (state, pa);
+			bus_read(32, pa, data);
+		else {
+			printf ("SKYEYE:2 sa_mmu_read error: unknown data type %d\n", datatype);
+			// skyeye_exit (-1);
+		}
+		return NO_FAULT;
+	}
+
+
+      datatrans:
+	if (datatype == ARM_HALFWORD_TYPE) {
+		temp = *data;
+		offset = (((ARMword) state->bigendSig * 2) ^ (real_va & 2)) << 3;	/* bit offset into the word */
+		*data = (temp >> offset) & 0xffff;
+	}
+	else if (datatype == ARM_BYTE_TYPE) {
+		temp = *data;
+		offset = (((ARMword) state->bigendSig * 3) ^ (real_va & 3)) << 3;	/* bit offset into the word */
+		*data = (temp >> offset & 0xffL);
+	}
+      end:
+	return NO_FAULT;
+}
+
+
+static fault_t
+sa_mmu_write_byte (ARMul_State * state, ARMword virt_addr, ARMword data)
+{
+	return sa_mmu_write (state, virt_addr, data, ARM_BYTE_TYPE);
+}
+
+static fault_t
+sa_mmu_write_halfword (ARMul_State * state, ARMword virt_addr, ARMword data)
+{
+	return sa_mmu_write (state, virt_addr, data, ARM_HALFWORD_TYPE);
+}
+
+static fault_t
+sa_mmu_write_word (ARMul_State * state, ARMword virt_addr, ARMword data)
+{
+	return sa_mmu_write (state, virt_addr, data, ARM_WORD_TYPE);
+}
+
+
+
+static fault_t
+sa_mmu_write (ARMul_State * state, ARMword va, ARMword data, ARMword datatype)
+{
+	tlb_entry_t *tlb;
+	cache_line_t *cache;
+	int b;
+	ARMword pa, real_va;
+	fault_t fault;
+
+	DEBUG_LOG(ARM11, "va = %x, val = %x\n", va, data);
+	va = mmu_pid_va_map (va);
+	real_va = va;
+
+	/*search instruction cache */
+	cache = mmu_cache_search (state, I_CACHE (), va);
+	if (cache) {
+		update_cache (state, va, data, datatype, cache, I_CACHE (),
+			      real_va);
+	}
+
+	if (MMU_Disabled) {
+		//mem_write_word(state, va, data);
+		if (datatype == ARM_BYTE_TYPE)
+			//mem_write_byte (state, va, data);
+			bus_write(8, va, data);
+		else if (datatype == ARM_HALFWORD_TYPE)
+			//mem_write_halfword (state, va, data);
+			bus_write(16, va, data);
+		else if (datatype == ARM_WORD_TYPE)
+			//mem_write_word (state, va, data);
+			bus_write(32, va, data);
+		else {
+			printf ("SKYEYE:1 sa_mmu_write error: unknown data type %d\n", datatype);
+			// skyeye_exit (-1);
+		}
+
+		return NO_FAULT;
+	}
+	/*align check */
+	//if ((va & (WORD_SIZE - 1)) && MMU_Aligned){
+	if (((va & 3) && (datatype == ARM_WORD_TYPE) && MMU_Aligned) ||
+	    ((va & 1) && (datatype == ARM_HALFWORD_TYPE) && MMU_Aligned)) {
+		DEBUG_LOG(ARM11, "align\n");
+		return ALIGNMENT_FAULT;
+	}			//else
+	va &= ~(WORD_SIZE - 1);
+	/*tlb translate */
+	fault = translate (state, va, D_TLB (), &tlb);
+	if (fault) {
+		DEBUG_LOG(ARM11, "translate\n");
+		return fault;
+	}
+	/*tlb check access */
+	fault = check_access (state, va, tlb, 0);
+	if (fault) {
+		DEBUG_LOG(ARM11, "check_access\n");
+		return fault;
+	}
+	/*search main cache */
+	cache = mmu_cache_search (state, MAIN_D_CACHE (), va);
+	if (cache) {
+		update_cache (state, va, data, datatype, cache,
+			      MAIN_D_CACHE (), real_va);
+	}
+	else {
+		/*search mini cache */
+		cache = mmu_cache_search (state, MINI_D_CACHE (), va);
+		if (cache) {
+			update_cache (state, va, data, datatype, cache,
+				      MINI_D_CACHE (), real_va);
+		}
+	}
+
+	if (!cache) {
+		b = tlb_b_flag (tlb);
+		pa = tlb_va_to_pa (tlb, va);
+		if (b) {
+			if (MMU_WBEnabled) {
+				if (datatype == ARM_WORD_TYPE)
+					mmu_wb_write_bytes (state, WB (), pa,
+							    (ARMbyte*)&data, 4);
+				else if (datatype == ARM_HALFWORD_TYPE)
+					mmu_wb_write_bytes (state, WB (),
+							    (pa |
+							     (real_va & 2)),
+							    (ARMbyte*)&data, 2);
+				else if (datatype == ARM_BYTE_TYPE)
+					mmu_wb_write_bytes (state, WB (),
+							    (pa |
+							     (real_va & 3)),
+							    (ARMbyte*)&data, 1);
+
+			}
+			else {
+				if (datatype == ARM_WORD_TYPE)
+					//mem_write_word (state, pa, data);
+					bus_write(32, pa, data);
+				else if (datatype == ARM_HALFWORD_TYPE)
+					/*
+					mem_write_halfword (state,
+							    (pa |
+							     (real_va & 2)),
+							    data);
+					*/
+					bus_write(16, pa | (real_va & 2), data);
+				else if (datatype == ARM_BYTE_TYPE)
+					/*
+					mem_write_byte (state,
+							(pa | (real_va & 3)),
+							data);
+					*/
+					bus_write(8, pa | (real_va & 3), data);
+			}
+		}
+		else {
+			mmu_wb_drain_all (state, WB ());
+
+			if (datatype == ARM_WORD_TYPE)
+				//mem_write_word (state, pa, data);
+				bus_write(32, pa, data);
+			else if (datatype == ARM_HALFWORD_TYPE)
+				/*
+					mem_write_halfword (state,
+						    (pa | (real_va & 2)),
+						    data);
+				*/
+				bus_write(16, pa | (real_va & 2), data);
+			else if (datatype == ARM_BYTE_TYPE)
+				/*
+				mem_write_byte (state, (pa | (real_va & 3)),
+						data);
+				*/
+				bus_write(8, pa | (real_va & 3), data);
+		}
+	}
+	return NO_FAULT;
+}
+
+static fault_t
+update_cache (ARMul_State * state, ARMword va, ARMword data, ARMword datatype,
+	      cache_line_t * cache, cache_s * cache_t, ARMword real_va)
+{
+	ARMword temp, offset;
+
+	ARMword index = va_cache_index (va, cache_t);
+
+	//cache->data[index] = data;
+
+	if (datatype == ARM_WORD_TYPE)
+		cache->data[index] = data;
+	else if (datatype == ARM_HALFWORD_TYPE) {
+		temp = cache->data[index];
+		offset = (((ARMword) state->bigendSig * 2) ^ (real_va & 2)) << 3;	/* bit offset into the word */
+		cache->data[index] =
+			(temp & ~(0xffffL << offset)) | ((data & 0xffffL) <<
+							 offset);
+	}
+	else if (datatype == ARM_BYTE_TYPE) {
+		temp = cache->data[index];
+		offset = (((ARMword) state->bigendSig * 3) ^ (real_va & 3)) << 3;	/* bit offset into the word */
+		cache->data[index] =
+			(temp & ~(0xffL << offset)) | ((data & 0xffL) <<
+						       offset);
+	}
+
+	if (index < (cache_t->width >> (WORD_SHT + 1)))
+		cache->tag |= TAG_FIRST_HALF_DIRTY;
+	else
+		cache->tag |= TAG_LAST_HALF_DIRTY;
+
+	return NO_FAULT;
+}
+
+ARMword
+sa_mmu_mrc (ARMul_State * state, ARMword instr, ARMword * value)
+{
+	mmu_regnum_t creg = (mmu_regnum_t)(BITS (16, 19) & 15);
+	ARMword data;
+
+	switch (creg) {
+	case MMU_ID:
+//              printf("mmu_mrc read ID     ");
+		data = 0x41007100;	/* v3 */
+		data = state->cpu->cpu_val;
+		break;
+	case MMU_CONTROL:
+//              printf("mmu_mrc read CONTROL");
+		data = state->mmu.control;
+		break;
+	case MMU_TRANSLATION_TABLE_BASE:
+//              printf("mmu_mrc read TTB    ");
+		data = state->mmu.translation_table_base;
+		break;
+	case MMU_DOMAIN_ACCESS_CONTROL:
+//              printf("mmu_mrc read DACR   ");
+		data = state->mmu.domain_access_control;
+		break;
+	case MMU_FAULT_STATUS:
+//              printf("mmu_mrc read FSR    ");
+		data = state->mmu.fault_status;
+		break;
+	case MMU_FAULT_ADDRESS:
+//              printf("mmu_mrc read FAR    ");
+		data = state->mmu.fault_address;
+		break;
+	case MMU_PID:
+		data = state->mmu.process_id;
+	default:
+		printf ("mmu_mrc read UNKNOWN - reg %d\n", creg);
+		data = 0;
+		break;
+	}
+//      printf("\t\t\t\t\tpc = 0x%08x\n", state->Reg[15]);
+	*value = data;
+	return data;
+}
+
+void
+sa_mmu_cache_ops (ARMul_State * state, ARMword instr, ARMword value)
+{
+	int CRm, OPC_2;
+
+	CRm = BITS (0, 3);
+	OPC_2 = BITS (5, 7);
+
+	if (OPC_2 == 0 && CRm == 7) {
+		mmu_cache_invalidate_all (state, I_CACHE ());
+		mmu_cache_invalidate_all (state, MAIN_D_CACHE ());
+		mmu_cache_invalidate_all (state, MINI_D_CACHE ());
+		return;
+	}
+
+	if (OPC_2 == 0 && CRm == 5) {
+		mmu_cache_invalidate_all (state, I_CACHE ());
+		return;
+	}
+
+	if (OPC_2 == 0 && CRm == 6) {
+		mmu_cache_invalidate_all (state, MAIN_D_CACHE ());
+		mmu_cache_invalidate_all (state, MINI_D_CACHE ());
+		return;
+	}
+
+	if (OPC_2 == 1 && CRm == 6) {
+		mmu_cache_invalidate (state, MAIN_D_CACHE (), value);
+		mmu_cache_invalidate (state, MINI_D_CACHE (), value);
+		return;
+	}
+
+	if (OPC_2 == 1 && CRm == 0xa) {
+		mmu_cache_clean (state, MAIN_D_CACHE (), value);
+		mmu_cache_clean (state, MINI_D_CACHE (), value);
+		return;
+	}
+
+	if (OPC_2 == 4 && CRm == 0xa) {
+		mmu_wb_drain_all (state, WB ());
+		return;
+	}
+	ERROR_LOG(ARM11, "Unknow OPC_2 = %x CRm = %x\n", OPC_2, CRm);
+}
+
+static void
+sa_mmu_tlb_ops (ARMul_State * state, ARMword instr, ARMword value)
+{
+	int CRm, OPC_2;
+
+	CRm = BITS (0, 3);
+	OPC_2 = BITS (5, 7);
+
+
+	if (OPC_2 == 0 && CRm == 0x7) {
+		mmu_tlb_invalidate_all (state, I_TLB ());
+		mmu_tlb_invalidate_all (state, D_TLB ());
+		return;
+	}
+
+	if (OPC_2 == 0 && CRm == 0x5) {
+		mmu_tlb_invalidate_all (state, I_TLB ());
+		return;
+	}
+
+	if (OPC_2 == 0 && CRm == 0x6) {
+		mmu_tlb_invalidate_all (state, D_TLB ());
+		return;
+	}
+
+	if (OPC_2 == 1 && CRm == 0x6) {
+		mmu_tlb_invalidate_entry (state, D_TLB (), value);
+		return;
+	}
+
+	ERROR_LOG(ARM11, "Unknow OPC_2 = %x CRm = %x\n", OPC_2, CRm);
+}
+
+static void
+sa_mmu_rb_ops (ARMul_State * state, ARMword instr, ARMword value)
+{
+	int CRm, OPC_2;
+
+	CRm = BITS (0, 3);
+	OPC_2 = BITS (5, 7);
+
+	if (OPC_2 == 0x0 && CRm == 0x0) {
+		mmu_rb_invalidate_all (RB ());
+		return;
+	}
+
+	if (OPC_2 == 0x2) {
+		int idx = CRm & 0x3;
+		int type = ((CRm >> 2) & 0x3) + 1;
+
+		if ((idx < 4) && (type < 4))
+			mmu_rb_load (state, RB (), idx, type, value);
+		return;
+	}
+
+	if ((OPC_2 == 1) && (CRm < 4)) {
+		mmu_rb_invalidate_entry (RB (), CRm);
+		return;
+	}
+
+	ERROR_LOG(ARM11, "Unknow OPC_2 = %x CRm = %x\n", OPC_2, CRm);
+}
+
+static ARMword
+sa_mmu_mcr (ARMul_State * state, ARMword instr, ARMword value)
+{
+	mmu_regnum_t creg = (mmu_regnum_t)(BITS (16, 19) & 15);
+	if (!strncmp (state->cpu->cpu_arch_name, "armv4", 5)) {
+		switch (creg) {
+		case MMU_CONTROL:
+//              printf("mmu_mcr wrote CONTROL      ");
+			state->mmu.control = (value | 0x70) & 0xFFFD;
+			break;
+		case MMU_TRANSLATION_TABLE_BASE:
+//              printf("mmu_mcr wrote TTB          ");
+			state->mmu.translation_table_base =
+				value & 0xFFFFC000;
+			break;
+		case MMU_DOMAIN_ACCESS_CONTROL:
+//              printf("mmu_mcr wrote DACR         ");
+			state->mmu.domain_access_control = value;
+			break;
+
+		case MMU_FAULT_STATUS:
+			state->mmu.fault_status = value & 0xFF;
+			break;
+		case MMU_FAULT_ADDRESS:
+			state->mmu.fault_address = value;
+			break;
+
+		case MMU_CACHE_OPS:
+			sa_mmu_cache_ops (state, instr, value);
+			break;
+		case MMU_TLB_OPS:
+			sa_mmu_tlb_ops (state, instr, value);
+			break;
+		case MMU_SA_RB_OPS:
+			sa_mmu_rb_ops (state, instr, value);
+			break;
+		case MMU_SA_DEBUG:
+			break;
+		case MMU_SA_CP15_R15:
+			break;
+		case MMU_PID:
+			//2004-06-06 lyh, bug provided by wen ye wenye@cs.ucsb.edu
+			state->mmu.process_id = value & 0x7e000000;
+			break;
+
+		default:
+			printf ("mmu_mcr wrote UNKNOWN - reg %d\n", creg);
+			break;
+		}
+	}
+    return 0;
+}
+
+//teawater add for arm2x86 2005.06.24-------------------------------------------
+static int
+sa_mmu_v2p_dbct (ARMul_State * state, ARMword virt_addr, ARMword * phys_addr)
+{
+	fault_t fault;
+	tlb_entry_t *tlb;
+
+	virt_addr = mmu_pid_va_map (virt_addr);
+	if (MMU_Enabled) {
+
+		/*align check */
+		if ((virt_addr & (WORD_SIZE - 1)) && MMU_Aligned) {
+			DEBUG_LOG(ARM11, "align\n");
+			return ALIGNMENT_FAULT;
+		}
+		else
+			virt_addr &= ~(WORD_SIZE - 1);
+
+		/*translate tlb */
+		fault = translate (state, virt_addr, I_TLB (), &tlb);
+		if (fault) {
+			DEBUG_LOG(ARM11, "translate\n");
+			return fault;
+		}
+
+		/*check access */
+		fault = check_access (state, virt_addr, tlb, 1);
+		if (fault) {
+			DEBUG_LOG(ARM11, "check_fault\n");
+			return fault;
+		}
+	}
+
+	if (MMU_Disabled) {
+		*phys_addr = virt_addr;
+	}
+	else {
+		*phys_addr = tlb_va_to_pa (tlb, virt_addr);
+	}
+
+	return (0);
+}
+
+//AJ2D--------------------------------------------------------------------------
+
+/*sa mmu_ops_t*/
+mmu_ops_t sa_mmu_ops = {
+	sa_mmu_init,
+	sa_mmu_exit,
+	sa_mmu_read_byte,
+	sa_mmu_write_byte,
+	sa_mmu_read_halfword,
+	sa_mmu_write_halfword,
+	sa_mmu_read_word,
+	sa_mmu_write_word,
+	sa_mmu_load_instr,
+	sa_mmu_mcr,
+	sa_mmu_mrc,
+//teawater add for arm2x86 2005.06.24-------------------------------------------
+	sa_mmu_v2p_dbct,
+//AJ2D--------------------------------------------------------------------------
+};
diff --git a/src/core/arm/interpreter/mmu/sa_mmu.h b/src/core/arm/interpreter/mmu/sa_mmu.h
new file mode 100644
index 000000000..64b1c5470
--- /dev/null
+++ b/src/core/arm/interpreter/mmu/sa_mmu.h
@@ -0,0 +1,58 @@
+/*
+    sa_mmu.h - StrongARM Memory Management Unit emulation.
+    ARMulator extensions for SkyEye.
+	<lyhost@263.net>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#ifndef _SA_MMU_H_
+#define _SA_MMU_H_
+
+
+/**
+ *  The interface of read data from bus
+ */
+int bus_read(short size, int addr, uint32_t * value);
+
+/**
+ * The interface of write data from bus
+ */
+int bus_write(short size, int addr, uint32_t value);
+
+
+typedef struct sa_mmu_s
+{
+	tlb_s i_tlb;
+	cache_s i_cache;
+
+	tlb_s d_tlb;
+	cache_s main_d_cache;
+	cache_s mini_d_cache;
+	rb_s rb_t;
+	wb_s wb_t;
+} sa_mmu_t;
+
+#define I_TLB() (&state->mmu.u.sa_mmu.i_tlb)
+#define I_CACHE() (&state->mmu.u.sa_mmu.i_cache)
+
+#define D_TLB() (&state->mmu.u.sa_mmu.d_tlb)
+#define MAIN_D_CACHE() (&state->mmu.u.sa_mmu.main_d_cache)
+#define MINI_D_CACHE() (&state->mmu.u.sa_mmu.mini_d_cache)
+#define WB() (&state->mmu.u.sa_mmu.wb_t)
+#define RB() (&state->mmu.u.sa_mmu.rb_t)
+
+extern mmu_ops_t sa_mmu_ops;
+#endif /*_SA_MMU_H_*/
diff --git a/src/core/arm/interpreter/mmu/tlb.cpp b/src/core/arm/interpreter/mmu/tlb.cpp
new file mode 100644
index 000000000..ca60ac1a1
--- /dev/null
+++ b/src/core/arm/interpreter/mmu/tlb.cpp
@@ -0,0 +1,307 @@
+#include <assert.h>
+
+#include "core/arm/interpreter/armdefs.h"
+
+ARMword tlb_masks[] = {
+	0x00000000,		/* TLB_INVALID */
+	0xFFFFF000,		/* TLB_SMALLPAGE */
+	0xFFFF0000,		/* TLB_LARGEPAGE */
+	0xFFF00000,		/* TLB_SECTION */
+	0xFFFFF000,		/*TLB_ESMALLPAGE, have TEX attirbute, only for XScale */
+	0xFFFFFC00		/* TLB_TINYPAGE */
+};
+
+/* This function encodes table 8-2 Interpreting AP bits,
+   returning non-zero if access is allowed. */
+static int
+check_perms (ARMul_State * state, int ap, int read)
+{
+	int s, r, user;
+
+	s = state->mmu.control & CONTROL_SYSTEM;
+	r = state->mmu.control & CONTROL_ROM;
+	//chy 2006-02-15 , should consider system mode, don't conside 26bit mode
+	user = (state->Mode == USER32MODE) || (state->Mode == USER26MODE) || (state->Mode == SYSTEM32MODE);
+
+	switch (ap) {
+	case 0:
+		return read && ((s && !user) || r);
+	case 1:
+		return !user;
+	case 2:
+		return read || !user;
+	case 3:
+		return 1;
+	}
+	return 0;
+}
+
+fault_t
+check_access (ARMul_State * state, ARMword virt_addr, tlb_entry_t * tlb,
+	      int read)
+{
+	int access;
+
+	state->mmu.last_domain = tlb->domain;
+	access = (state->mmu.domain_access_control >> (tlb->domain * 2)) & 3;
+	if ((access == 0) || (access == 2)) {
+		/* It's unclear from the documentation whether this
+		   should always raise a section domain fault, or if
+		   it should be a page domain fault in the case of an
+		   L1 that describes a page table.  In the ARM710T
+		   datasheets, "Figure 8-9: Sequence for checking faults"
+		   seems to indicate the former, while "Table 8-4: Priority
+		   encoding of fault status" gives a value for FS[3210] in
+		   the event of a domain fault for a page.  Hmm. */
+		return SECTION_DOMAIN_FAULT;
+	}
+	if (access == 1) {
+		/* client access - check perms */
+		int subpage, ap;
+
+		switch (tlb->mapping) {
+			/*ks 2004-05-09   
+			 *   only for XScale
+			 *   Extend Small Page(ESP) Format
+			 *   31-12 bits    the base addr of ESP
+			 *   11-10 bits    SBZ
+			 *   9-6   bits    TEX
+			 *   5-4   bits    AP
+			 *   3     bit     C
+			 *   2     bit     B
+			 *   1-0   bits    11
+			 * */
+		case TLB_ESMALLPAGE:	//xj
+			subpage = 0;
+			//printf("TLB_ESMALLPAGE virt_addr=0x%x  \n",virt_addr );
+			break;
+
+		case TLB_TINYPAGE:
+			subpage = 0;
+			//printf("TLB_TINYPAGE virt_addr=0x%x  \n",virt_addr );
+			break;
+
+		case TLB_SMALLPAGE:
+			subpage = (virt_addr >> 10) & 3;
+			break;
+		case TLB_LARGEPAGE:
+			subpage = (virt_addr >> 14) & 3;
+			break;
+		case TLB_SECTION:
+			subpage = 3;
+			break;
+		default:
+			assert (0);
+			subpage = 0;	/* cleans a warning */
+		}
+		ap = (tlb->perms >> (subpage * 2 + 4)) & 3;
+		if (!check_perms (state, ap, read)) {
+			if (tlb->mapping == TLB_SECTION) {
+				return SECTION_PERMISSION_FAULT;
+			}
+			else {
+				return SUBPAGE_PERMISSION_FAULT;
+			}
+		}
+	}
+	else {			/* access == 3 */
+		/* manager access - don't check perms */
+	}
+	return NO_FAULT;
+}
+
+fault_t
+translate (ARMul_State * state, ARMword virt_addr, tlb_s * tlb_t,
+	   tlb_entry_t ** tlb)
+{
+	*tlb = mmu_tlb_search (state, tlb_t, virt_addr);
+	if (!*tlb) {
+		/* walk the translation tables */
+		ARMword l1addr, l1desc;
+		tlb_entry_t entry;
+
+		l1addr = state->mmu.translation_table_base & 0xFFFFC000;
+		l1addr = (l1addr | (virt_addr >> 18)) & ~3;
+		//l1desc = mem_read_word (state, l1addr);
+		bus_read(32, l1addr, &l1desc);
+		switch (l1desc & 3) {
+		case 0:
+			/* 
+			 * according to Figure 3-9 Sequence for checking faults in arm manual,
+			 * section translation fault should be returned here. 
+			 */
+			{
+				return SECTION_TRANSLATION_FAULT;
+			}
+		case 3:
+			/* fine page table */
+			// dcl 2006-01-08
+			{
+				ARMword l2addr, l2desc;
+
+				l2addr = l1desc & 0xFFFFF000;
+				l2addr = (l2addr |
+					  ((virt_addr & 0x000FFC00) >> 8)) &
+					~3;
+				//l2desc = mem_read_word (state, l2addr);				 
+				bus_read(32, l2addr, &l2desc);
+
+				entry.virt_addr = virt_addr;
+				entry.phys_addr = l2desc;
+				entry.perms = l2desc & 0x00000FFC;
+				entry.domain = (l1desc >> 5) & 0x0000000F;
+				switch (l2desc & 3) {
+				case 0:
+					state->mmu.last_domain = entry.domain;
+					return PAGE_TRANSLATION_FAULT;
+				case 3:
+					entry.mapping = TLB_TINYPAGE;
+					break;
+				case 1:
+					// this is untested
+					entry.mapping = TLB_LARGEPAGE;
+					break;
+				case 2:
+					// this is untested
+					entry.mapping = TLB_SMALLPAGE;
+					break;
+				}
+			}
+			break;
+		case 1:
+			/* coarse page table */
+			{
+				ARMword l2addr, l2desc;
+
+				l2addr = l1desc & 0xFFFFFC00;
+				l2addr = (l2addr |
+					  ((virt_addr & 0x000FF000) >> 10)) &
+					~3;
+				//l2desc = mem_read_word (state, l2addr);
+				bus_read(32, l2addr, &l2desc);
+
+				entry.virt_addr = virt_addr;
+				entry.phys_addr = l2desc;
+				entry.perms = l2desc & 0x00000FFC;
+				entry.domain = (l1desc >> 5) & 0x0000000F;
+				//printf("SKYEYE:PAGE virt_addr = %x,l1desc=%x,phys_addr=%x\n",virt_addr,l1desc,entry.phys_addr);
+				//chy 2003-09-02 for xscale
+				switch (l2desc & 3) {
+				case 0:
+					state->mmu.last_domain = entry.domain;
+					return PAGE_TRANSLATION_FAULT;
+				case 3:
+					if (!state->is_XScale) {
+						state->mmu.last_domain =
+							entry.domain;
+						return PAGE_TRANSLATION_FAULT;
+					};
+					//ks 2004-05-09 xscale shold use Extend Small Page
+					//entry.mapping = TLB_SMALLPAGE;
+					entry.mapping = TLB_ESMALLPAGE;	//xj
+					break;
+				case 1:
+					entry.mapping = TLB_LARGEPAGE;
+					break;
+				case 2:
+					entry.mapping = TLB_SMALLPAGE;
+					break;
+				}
+			}
+			break;
+		case 2:
+			/* section */
+			//printf("SKYEYE:WARNING: not implement section mapping incompletely\n");
+			//printf("SKYEYE:SECTION virt_addr = %x,l1desc=%x\n",virt_addr,l1desc);
+			//return SECTION_DOMAIN_FAULT;
+			//#if 0
+			entry.virt_addr = virt_addr;
+			entry.phys_addr = l1desc;
+			entry.perms = l1desc & 0x00000C0C;
+			entry.domain = (l1desc >> 5) & 0x0000000F;
+			entry.mapping = TLB_SECTION;
+			break;
+			//#endif
+		}
+		entry.virt_addr &= tlb_masks[entry.mapping];
+		entry.phys_addr &= tlb_masks[entry.mapping];
+
+		/* place entry in the tlb */
+		*tlb = &tlb_t->entrys[tlb_t->cycle];
+		tlb_t->cycle = (tlb_t->cycle + 1) % tlb_t->num;
+		**tlb = entry;
+	}
+	state->mmu.last_domain = (*tlb)->domain;
+	return NO_FAULT;
+}
+
+int
+mmu_tlb_init (tlb_s * tlb_t, int num)
+{
+	tlb_entry_t *e;
+	int i;
+
+	e = (tlb_entry_t *) malloc (sizeof (*e) * num);
+	if (e == NULL) {
+		ERROR_LOG(ARM11, "malloc size %d\n", sizeof (*e) * num);
+		goto tlb_malloc_error;
+	}
+	tlb_t->entrys = e;
+	for (i = 0; i < num; i++, e++)
+		e->mapping = TLB_INVALID;
+	tlb_t->cycle = 0;
+	tlb_t->num = num;
+	return 0;
+
+      tlb_malloc_error:
+	return -1;
+}
+
+void
+mmu_tlb_exit (tlb_s * tlb_t)
+{
+	free (tlb_t->entrys);
+};
+
+void
+mmu_tlb_invalidate_all (ARMul_State * state, tlb_s * tlb_t)
+{
+	int entry;
+
+	for (entry = 0; entry < tlb_t->num; entry++) {
+		tlb_t->entrys[entry].mapping = TLB_INVALID;
+	}
+	tlb_t->cycle = 0;
+}
+
+void
+mmu_tlb_invalidate_entry (ARMul_State * state, tlb_s * tlb_t, ARMword addr)
+{
+	tlb_entry_t *tlb;
+
+	tlb = mmu_tlb_search (state, tlb_t, addr);
+	if (tlb) {
+		tlb->mapping = TLB_INVALID;
+	}
+}
+
+tlb_entry_t *
+mmu_tlb_search (ARMul_State * state, tlb_s * tlb_t, ARMword virt_addr)
+{
+	int entry;
+
+	for (entry = 0; entry < tlb_t->num; entry++) {
+		tlb_entry_t *tlb;
+		ARMword mask;
+
+		tlb = &(tlb_t->entrys[entry]);
+		if (tlb->mapping == TLB_INVALID) {
+			continue;
+		}
+		mask = tlb_masks[tlb->mapping];
+		if ((virt_addr & mask) == (tlb->virt_addr & mask)) {
+			return tlb;
+		}
+	}
+	return NULL;
+}
diff --git a/src/core/arm/mmu/tlb.h b/src/core/arm/interpreter/mmu/tlb.h
index 938c01786..40856567b 100644
--- a/src/core/arm/mmu/tlb.h
+++ b/src/core/arm/interpreter/mmu/tlb.h
@@ -63,14 +63,7 @@ typedef struct tlb_s
 #define tlb_b_flag(tlb) \
     ((tlb)->perms & 0x4)
 
-#define  tlb_va_to_pa(tlb, va) \
-(\
- {\
-    ARMword mask = tlb_masks[tlb->mapping];      \
-    (tlb->phys_addr & mask) | (va & ~mask);\
- }\
-)
-
+#define  tlb_va_to_pa(tlb, va) ((tlb->phys_addr & tlb_masks[tlb->mapping]) | (va & ~tlb_masks[tlb->mapping]))
 fault_t
 check_access (ARMul_State * state, ARMword virt_addr, tlb_entry_t * tlb,
           int read);
diff --git a/src/core/arm/interpreter/mmu/wb.cpp b/src/core/arm/interpreter/mmu/wb.cpp
new file mode 100644
index 000000000..82c0cec02
--- /dev/null
+++ b/src/core/arm/interpreter/mmu/wb.cpp
@@ -0,0 +1,149 @@
+#include "core/arm/interpreter/armdefs.h"
+
+/* wb_init
+ * @wb_t	:wb_t to init
+ * @num		:num of entrys
+ * @nb		:num of byte of each entry
+ *
+ * $	-1:error
+ * 		 0:ok
+ * */
+int
+mmu_wb_init (wb_s * wb_t, int num, int nb)
+{
+	int i;
+	wb_entry_t *entrys, *wb;
+
+	entrys = (wb_entry_t *) malloc (sizeof (*entrys) * num);
+	if (entrys == NULL) {
+		ERROR_LOG(ARM11, "malloc size %d\n", sizeof (*entrys) * num);
+		goto entrys_malloc_error;
+	}
+
+	for (wb = entrys, i = 0; i < num; i++, wb++) {
+		/*chy 2004-06-06, fix bug found by wenye@cs.ucsb.edu */
+		//wb->data = (ARMword *)malloc(sizeof(ARMword) * nb);
+		wb->data = (ARMbyte *) malloc (nb);
+		if (wb->data == NULL) {
+			ERROR_LOG(ARM11, "malloc size of %d\n", nb);
+			goto data_malloc_error;
+		}
+
+	};
+
+	wb_t->first = wb_t->last = wb_t->used = 0;
+	wb_t->num = num;
+	wb_t->nb = nb;
+	wb_t->entrys = entrys;
+	return 0;
+
+      data_malloc_error:
+	while (--i >= 0)
+		free (entrys[i].data);
+	free (entrys);
+      entrys_malloc_error:
+	return -1;
+};
+
+/* wb_exit
+ * @wb_t :wb_t to exit
+ * */
+void
+mmu_wb_exit (wb_s * wb_t)
+{
+	int i;
+	wb_entry_t *wb;
+
+	wb = wb_t->entrys;
+	for (i = 0; i < wb_t->num; i++, wb++) {
+		free (wb->data);
+	}
+	free (wb_t->entrys);
+};
+
+/* wb_write_words :put words in Write Buffer
+ * @state:	ARMul_State
+ * @wb_t:	write buffer
+ * @pa:		physical address
+ * @data:	data ptr
+ * @n		number of word to write
+ *
+ * Note: write buffer merge is not implemented, can be done late
+ * */
+void
+mmu_wb_write_bytes (ARMul_State * state, wb_s * wb_t, ARMword pa,
+		    ARMbyte * data, int n)
+{
+	int i;
+	wb_entry_t *wb;
+
+	while (n) {
+		if (wb_t->num == wb_t->used) {
+			/*clean the last wb entry */
+			ARMword t;
+
+			wb = &wb_t->entrys[wb_t->last];
+			t = wb->pa;
+			for (i = 0; i < wb->nb; i++) {
+				//mem_write_byte (state, t, wb->data[i]);
+				bus_write(8, t, wb->data[i]);
+				//t += WORD_SIZE;
+				t++;
+			}
+			wb_t->last++;
+			if (wb_t->last == wb_t->num)
+				wb_t->last = 0;
+			wb_t->used--;
+		}
+
+		wb = &wb_t->entrys[wb_t->first];
+		i = (n < wb_t->nb) ? n : wb_t->nb;
+
+		wb->pa = pa;
+		//pa += i << WORD_SHT;
+		pa += i;
+
+		wb->nb = i;
+		//memcpy(wb->data, data, i << WORD_SHT);
+		memcpy (wb->data, data, i);
+		data += i;
+		n -= i;
+		wb_t->first++;
+		if (wb_t->first == wb_t->num)
+			wb_t->first = 0;
+		wb_t->used++;
+	};
+//teawater add for set_dirty fflash cache function 2005.07.18-------------------
+#ifdef DBCT
+	if (!skyeye_config.no_dbct) {
+		tb_setdirty (state, pa, NULL);
+	}
+#endif
+//AJ2D--------------------------------------------------------------------------
+}
+
+/* wb_drain_all
+ * @wb_t wb_t to drain
+ * */
+void
+mmu_wb_drain_all (ARMul_State * state, wb_s * wb_t)
+{
+	ARMword pa;
+	wb_entry_t *wb;
+	int i;
+
+	while (wb_t->used) {
+		wb = &wb_t->entrys[wb_t->last];
+		pa = wb->pa;
+		for (i = 0; i < wb->nb; i++) {
+			//mem_write_byte (state, pa, wb->data[i]);
+			bus_write(8, pa, wb->data[i]);
+			//pa += WORD_SIZE;
+			pa++;
+		}
+		wb_t->last++;
+		if (wb_t->last == wb_t->num)
+			wb_t->last = 0;
+		wb_t->used--;
+	};
+}
diff --git a/src/core/arm/mmu/wb.h b/src/core/arm/interpreter/mmu/wb.h
index 8fb7de946..8fb7de946 100644
--- a/src/core/arm/mmu/wb.h
+++ b/src/core/arm/interpreter/mmu/wb.h
diff --git a/src/core/arm/interpreter/mmu/xscale_copro.cpp b/src/core/arm/interpreter/mmu/xscale_copro.cpp
new file mode 100644
index 000000000..433ce8e02
--- /dev/null
+++ b/src/core/arm/interpreter/mmu/xscale_copro.cpp
@@ -0,0 +1,1391 @@
+/*
+    armmmu.c - Memory Management Unit emulation.
+    ARMulator extensions for the ARM7100 family.
+    Copyright (C) 1999  Ben Williamson
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include <assert.h>
+#include <string.h>
+
+#include "core/arm/interpreter/armdefs.h"
+#include "core/arm/interpreter/armemu.h"
+
+/*#include "pxa.h" */
+
+/* chy 2005-09-19 */
+
+/* extern pxa270_io_t pxa270_io; */
+/* chy 2005-09-19 -----end */
+
+typedef struct xscale_mmu_desc_s
+{
+	int i_tlb;
+	cache_desc_t i_cache;
+
+	int d_tlb;
+	cache_desc_t main_d_cache;
+	cache_desc_t mini_d_cache;
+	//int   rb;  xscale has no read buffer
+	wb_desc_t wb;
+} xscale_mmu_desc_t;
+
+static xscale_mmu_desc_t pxa_mmu_desc = {
+	32,
+	{32, 32, 32, CACHE_WRITE_BACK},
+
+	32,
+	{32, 32, 32, CACHE_WRITE_BACK},
+	{32, 2, 8, CACHE_WRITE_BACK},
+	{8, 16},		//for byte size, 
+};
+
+//chy 2005-09-19 for cp6
+#define CR0_ICIP   0
+#define CR1_ICMR   1
+//chy 2005-09-19 ---end
+//----------- for cp14-----------------
+#define  CCLKCFG   6
+#define  PWRMODE   7
+typedef struct xscale_cp14_reg_s
+{
+	unsigned cclkcfg;	//reg6
+	unsigned pwrmode;	//reg7
+} xscale_cp14_reg_s;
+
+xscale_cp14_reg_s pxa_cp14_regs;
+
+//--------------------------------------
+
+static fault_t xscale_mmu_write (ARMul_State * state, ARMword va,
+				 ARMword data, ARMword datatype);
+static fault_t xscale_mmu_read (ARMul_State * state, ARMword va,
+				ARMword * data, ARMword datatype);
+
+ARMword xscale_mmu_mrc (ARMul_State * state, ARMword instr, ARMword * value);
+ARMword xscale_mmu_mcr (ARMul_State * state, ARMword instr, ARMword value);
+
+
+/* jeff add 2010.9.26 for pxa270 cp6*/
+#define PXA270_ICMR 0x40D00004
+#define PXA270_ICPR 0x40D00010
+#define PXA270_ICLR 0x40D00008
+//chy 2005-09-19 for xscale pxa27x cp6
+unsigned
+xscale_cp6_mrc (ARMul_State * state, unsigned type, ARMword instr,
+		ARMword * data)
+{
+	unsigned opcode_2 = BITS (5, 7);
+	unsigned CRm = BITS (0, 3);
+	unsigned reg = BITS (16, 19);
+	unsigned result;
+
+	//printf("SKYEYE: xscale_cp6_mrc:opcode_2 0x%x, CRm 0x%x, reg 0x%x,reg[15] 0x%x, instr %x\n",opcode_2,CRm,reg,state->Reg[15], instr);
+
+	switch (reg) {
+	case CR0_ICIP: {		// cp 6 reg 0
+		//printf("cp6_mrc cr0 ICIP              \n");
+		/* *data = (pxa270_io.icmr & pxa270_io.icpr) & ~pxa270_io.iclr; */
+		/* use bus_read get the pxa270 machine registers  2010.9.26 jeff*/
+		int icmr, icpr, iclr;
+		bus_read(32, PXA270_ICMR, (uint32_t*)&icmr);
+		bus_read(32, PXA270_ICPR, (uint32_t*)&icpr);
+		bus_read(32, PXA270_ICLR, (uint32_t*)&iclr);
+		*data = (icmr & icpr)  & ~iclr;
+		}
+		break;
+	case CR1_ICMR: {	// cp 6 reg 1
+		//printf("cp6_mrc cr1 ICMR\n");
+		/* *data = pxa270_io.icmr; */
+		int icmr;
+		/* use bus_read get the pxa270 machine registers  2010.9.26 jeff*/
+		bus_read(32, PXA270_ICMR, (uint32_t*)&icmr);
+		*data = icmr;
+		}
+		break;
+	default:
+		*data = 0;
+		printf ("SKYEYE:cp6_mrc unknown cp6 regs!!!!!!\n");
+		printf ("SKYEYE: xscale_cp6_mrc:opcode_2 0x%x, CRm 0x%x, reg 0x%x,reg[15] 0x%x, instr %x\n", opcode_2, CRm, reg, state->Reg[15], instr);
+		break;
+	}
+	return 0;
+}
+
+//chy 2005-09-19 end
+//xscale cp13 ----------------------------------------------------
+unsigned
+xscale_cp13_init (ARMul_State * state)
+{
+	//printf("SKYEYE: xscale_cp13_init: begin\n");
+	return 0;
+}
+
+unsigned
+xscale_cp13_exit (ARMul_State * state)
+{
+	//printf("SKYEYE: xscale_cp13_exit: begin\n");
+	return 0;
+}
+
+unsigned
+xscale_cp13_ldc (ARMul_State * state, unsigned type, ARMword instr,
+		 ARMword data)
+{
+	printf ("SKYEYE: xscale_cp13_ldc: ERROR isn't existed,");
+	SKYEYE_OUTREGS (stderr);
+	fprintf (stderr, "\n");
+	// skyeye_exit (-1);
+	return 0; //No matter return value, only for compiler.
+}
+
+unsigned
+xscale_cp13_stc (ARMul_State * state, unsigned type, ARMword instr,
+		 ARMword * data)
+{
+	printf ("SKYEYE: xscale_cp13_stc: ERROR isn't existed,");
+	SKYEYE_OUTREGS (stderr);
+	fprintf (stderr, "\n");
+	// skyeye_exit (-1);
+	return 0; //No matter return value, only for compiler.
+}
+
+unsigned
+xscale_cp13_mrc (ARMul_State * state, unsigned type, ARMword instr,
+		 ARMword * data)
+{
+	printf ("SKYEYE: xscale_cp13_mrc: ERROR isn't existed,");
+	SKYEYE_OUTREGS (stderr);
+	fprintf (stderr, "\n");
+	// skyeye_exit (-1);
+	return 0; //No matter return value, only for compiler.
+}
+
+unsigned
+xscale_cp13_mcr (ARMul_State * state, unsigned type, ARMword instr,
+		 ARMword data)
+{
+	printf ("SKYEYE: xscale_cp13_mcr: ERROR isn't existed,");
+	SKYEYE_OUTREGS (stderr);
+	fprintf (stderr, "\n");
+	// skyeye_exit (-1);
+	return 0; //No matter return value, only for compiler.
+}
+
+unsigned
+xscale_cp13_cdp (ARMul_State * state, unsigned type, ARMword instr)
+{
+	printf ("SKYEYE: xscale_cp13_cdp: ERROR isn't existed,");
+	SKYEYE_OUTREGS (stderr);
+	fprintf (stderr, "\n");
+	// skyeye_exit (-1);
+	return 0; //No matter return value, only for compiler.
+}
+
+unsigned
+xscale_cp13_read_reg (ARMul_State * state, unsigned reg, ARMword * data)
+{
+	printf ("SKYEYE: xscale_cp13_read_reg: ERROR isn't existed,");
+	SKYEYE_OUTREGS (stderr);
+	fprintf (stderr, "\n");
+	return 0;
+	//exit(-1);
+}
+
+unsigned
+xscale_cp13_write_reg (ARMul_State * state, unsigned reg, ARMword data)
+{
+	printf ("SKYEYE: xscale_cp13_write_reg: ERROR isn't existed,");
+	SKYEYE_OUTREGS (stderr);
+	fprintf (stderr, "\n");
+	// skyeye_exit (-1);
+	return 0; //No matter return value, only for compiler.
+}
+
+//------------------------------------------------------------------
+//xscale cp14 ----------------------------------------------------
+unsigned
+xscale_cp14_init (ARMul_State * state)
+{
+	//printf("SKYEYE: xscale_cp14_init: begin\n");
+	pxa_cp14_regs.cclkcfg = 0;
+	pxa_cp14_regs.pwrmode = 0;
+	return 0;
+}
+
+unsigned
+xscale_cp14_exit (ARMul_State * state)
+{
+	//printf("SKYEYE: xscale_cp14_exit: begin\n");
+	return 0;
+}
+
+unsigned
+xscale_cp14_ldc (ARMul_State * state, unsigned type, ARMword instr,
+		 ARMword data)
+{
+	printf ("SKYEYE: xscale_cp14_ldc: ERROR isn't existed, reg15 0x%x\n",
+		state->Reg[15]);
+	SKYEYE_OUTREGS (stderr);
+	// skyeye_exit (-1);
+	return 0; //No matter return value, only for compiler.
+}
+
+unsigned
+xscale_cp14_stc (ARMul_State * state, unsigned type, ARMword instr,
+		 ARMword * data)
+{
+	printf ("SKYEYE: xscale_cp14_stc: ERROR isn't existed, reg15 0x%x\n",
+		state->Reg[15]);
+	SKYEYE_OUTREGS (stderr);
+	// skyeye_exit (-1);
+	return 0; //No matter return value, only for compiler.
+}
+
+unsigned
+xscale_cp14_mrc (ARMul_State * state, unsigned type, ARMword instr,
+		 ARMword * data)
+{
+	unsigned opcode_2 = BITS (5, 7);
+	unsigned CRm = BITS (0, 3);
+	unsigned reg = BITS (16, 19);
+	unsigned result;
+
+	//printf("SKYEYE: xscale_cp14_mrc:opcode_2 0x%x, CRm 0x%x, reg 0x%x,reg[15] 0x%x, instr %x\n",opcode_2,CRm,reg,\
+	state->Reg[15], instr);
+
+	switch (reg) {
+	case CCLKCFG:		// cp 14 reg 6
+		//printf("cp14_mrc cclkcfg              \n");
+		*data = pxa_cp14_regs.cclkcfg;
+		break;
+	case PWRMODE:		// cp 14 reg 7
+		//printf("cp14_mrc pwrmode              \n");
+		*data = pxa_cp14_regs.pwrmode;
+		break;
+	default:
+		*data = 0;
+		printf ("SKYEYE:cp14_mrc unknown cp14 regs!!!!!!\n");
+		break;
+	}
+	return 0;
+}
+unsigned xscale_cp14_mcr (ARMul_State * state, unsigned type, ARMword instr,
+			  ARMword data)
+{
+	unsigned opcode_2 = BITS (5, 7);
+	unsigned CRm = BITS (0, 3);
+	unsigned reg = BITS (16, 19);
+	unsigned result;
+
+	//printf("SKYEYE: xscale_cp14_mcr:opcode_2 0x%x, CRm 0x%x, reg 0x%x,reg[15] 0x%x, instr %x\n",opcode_2,CRm,reg,\
+	  state->Reg[15], instr);
+
+	switch (reg) {
+	case CCLKCFG:		// cp 14 reg 6
+		//printf("cp14_mcr cclkcfg              \n");
+		pxa_cp14_regs.cclkcfg = data & 0xf;
+		break;
+		case PWRMODE:	// cp 14 reg 7
+			//printf("cp14_mcr pwrmode              \n");
+		pxa_cp14_regs.pwrmode = data & 0x3;
+		break;
+		default:printf ("SKYEYE: cp14_mcr unknown cp14 regs!!!!!!\n");
+		break;
+	}
+	return 0;
+}
+unsigned xscale_cp14_cdp (ARMul_State * state, unsigned type, ARMword instr)
+{
+	printf ("SKYEYE: xscale_cp14_cdp: ERROR isn't existed, reg15 0x%x\n",
+		state->Reg[15]);
+	SKYEYE_OUTREGS (stderr);
+	// skyeye_exit (-1);
+	return 0; //No matter return value, only for compiler.
+}
+unsigned xscale_cp14_read_reg (ARMul_State * state, unsigned reg,
+			       ARMword * data)
+{
+	printf ("SKYEYE: xscale_cp14_read_reg: ERROR isn't existed, reg15 0x%x\n", state->Reg[15]);
+	SKYEYE_OUTREGS (stderr);
+	// skyeye_exit (-1);
+	return 0; //No matter return value, only for compiler.
+}
+unsigned xscale_cp14_write_reg (ARMul_State * state, unsigned reg,
+				ARMword data)
+{
+	printf ("SKYEYE: xscale_cp14_write_reg: ERROR isn't existed, reg15 0x%x\n", state->Reg[15]);
+	SKYEYE_OUTREGS (stderr);
+	// skyeye_exit (-1);
+
+	return 0; //No matter return value, only for compiler.
+}
+
+//------------------------------------------------------------------
+//cp15 -------------------------------------
+unsigned xscale_cp15_ldc (ARMul_State * state, unsigned type, ARMword instr,
+			  ARMword data)
+{
+	printf ("SKYEYE: xscale_cp15_ldc: ERROR isn't existed\n");
+	SKYEYE_OUTREGS (stderr);
+	// skyeye_exit (-1);
+
+	return 0; //No matter return value, only for compiler.
+}
+unsigned xscale_cp15_stc (ARMul_State * state, unsigned type, ARMword instr,
+			  ARMword * data)
+{
+	printf ("SKYEYE: xscale_cp15_stc: ERROR isn't existed\n");
+	SKYEYE_OUTREGS (stderr);
+	// skyeye_exit (-1);
+
+	return 0; //No matter return value, only for compiler.
+}
+unsigned xscale_cp15_cdp (ARMul_State * state, unsigned type, ARMword instr)
+{
+	printf ("SKYEYE: xscale_cp15_cdp: ERROR isn't existed\n");
+	SKYEYE_OUTREGS (stderr);
+	// skyeye_exit (-1);
+
+	return 0; //No matter return value, only for compiler.
+}
+unsigned xscale_cp15_read_reg (ARMul_State * state, unsigned reg,
+			       ARMword * data)
+{
+//chy 2003-09-03: for xsacle_cp15_cp_access_allowed
+	if (reg == 15) {
+		*data = state->mmu.copro_access;
+		//printf("SKYEYE: xscale_cp15_read_reg: reg 0x%x,data %x\n",reg,*data);
+		return 0;
+	}
+	printf ("SKYEYE: xscale_cp15_read_reg: reg 0x%x, ERROR isn't existed\n", reg);
+	SKYEYE_OUTREGS (stderr);
+	// skyeye_exit (-1);
+
+	return 0; //No matter return value, only for compiler.
+}
+
+//chy 2003-09-03 used by macro CP_ACCESS_ALLOWED in armemu.h
+unsigned xscale_cp15_cp_access_allowed (ARMul_State * state, unsigned reg,
+					unsigned cpnum)
+{
+	unsigned data;
+
+	xscale_cp15_read_reg (state, reg, &data);
+	//printf("SKYEYE: cp15_cp_access_allowed data %x, cpnum %x, result %x\n", data, cpnum, (data & 1<<cpnum));
+	if (data & 1 << cpnum)
+		return 1;
+	else
+		return 0;
+}
+
+unsigned xscale_cp15_write_reg (ARMul_State * state, unsigned reg,
+				ARMword value)
+{
+	switch (reg) {
+	case MMU_FAULT_STATUS:
+		//printf("SKYEYE:cp15_write_reg  wrote FS        val 0x%x \n",value);
+		state->mmu.fault_status = value & 0x6FF;
+		break;
+	case MMU_FAULT_ADDRESS:
+		//printf("SKYEYE:cp15_write_reg wrote FA         val 0x%x \n",value);
+		state->mmu.fault_address = value;
+		break;
+	default:
+		printf ("SKYEYE: xscale_cp15_write_reg: reg 0x%x R15 %x ERROR isn't existed\n", reg, state->Reg[15]);
+		SKYEYE_OUTREGS (stderr);
+		// skyeye_exit (-1);
+	}
+	return 0;
+}
+
+unsigned
+xscale_cp15_init (ARMul_State * state)
+{
+	xscale_mmu_desc_t *desc;
+	cache_desc_t *c_desc;
+
+	state->mmu.control = 0;
+	state->mmu.translation_table_base = 0xDEADC0DE;
+	state->mmu.domain_access_control = 0xDEADC0DE;
+	state->mmu.fault_status = 0;
+	state->mmu.fault_address = 0;
+	state->mmu.process_id = 0;
+	state->mmu.cache_type = 0xB1AA1AA;	//0000 1011 0001 1010 1010 0001 1010 1010
+	state->mmu.aux_control = 0;
+
+	desc = &pxa_mmu_desc;
+
+	if (mmu_tlb_init (I_TLB (), desc->i_tlb)) {
+		ERROR_LOG(ARM11, "i_tlb init %d\n", -1);
+		goto i_tlb_init_error;
+	}
+
+	c_desc = &desc->i_cache;
+	if (mmu_cache_init (I_CACHE (), c_desc->width, c_desc->way,
+			    c_desc->set, c_desc->w_mode)) {
+		ERROR_LOG(ARM11, "i_cache init %d\n", -1);
+		goto i_cache_init_error;
+	}
+
+	if (mmu_tlb_init (D_TLB (), desc->d_tlb)) {
+		ERROR_LOG(ARM11, "d_tlb init %d\n", -1);
+		goto d_tlb_init_error;
+	}
+
+	c_desc = &desc->main_d_cache;
+	if (mmu_cache_init (MAIN_D_CACHE (), c_desc->width, c_desc->way,
+			    c_desc->set, c_desc->w_mode)) {
+		ERROR_LOG(ARM11, "main_d_cache init %d\n", -1);
+		goto main_d_cache_init_error;
+	}
+
+	c_desc = &desc->mini_d_cache;
+	if (mmu_cache_init (MINI_D_CACHE (), c_desc->width, c_desc->way,
+			    c_desc->set, c_desc->w_mode)) {
+		ERROR_LOG(ARM11, "mini_d_cache init %d\n", -1);
+		goto mini_d_cache_init_error;
+	}
+
+	if (mmu_wb_init (WB (), desc->wb.num, desc->wb.nb)) {
+		ERROR_LOG(ARM11, "wb init %d\n", -1);
+		goto wb_init_error;
+	}
+#if 0
+	if (mmu_rb_init (RB (), desc->rb)) {
+		ERROR_LOG(ARM11, "rb init %d\n", -1);
+		goto rb_init_error;
+	}
+#endif
+
+	return 0;
+#if 0
+      rb_init_error:
+	mmu_wb_exit (WB ());
+#endif
+      wb_init_error:
+	mmu_cache_exit (MINI_D_CACHE ());
+      mini_d_cache_init_error:
+	mmu_cache_exit (MAIN_D_CACHE ());
+      main_d_cache_init_error:
+	mmu_tlb_exit (D_TLB ());
+      d_tlb_init_error:
+	mmu_cache_exit (I_CACHE ());
+      i_cache_init_error:
+	mmu_tlb_exit (I_TLB ());
+      i_tlb_init_error:
+	return -1;
+}
+
+unsigned
+xscale_cp15_exit (ARMul_State * state)
+{
+	//mmu_rb_exit(RB());
+	mmu_wb_exit (WB ());
+	mmu_cache_exit (MINI_D_CACHE ());
+	mmu_cache_exit (MAIN_D_CACHE ());
+	mmu_tlb_exit (D_TLB ());
+	mmu_cache_exit (I_CACHE ());
+	mmu_tlb_exit (I_TLB ());
+	return 0;
+};
+
+
+static fault_t
+	xscale_mmu_load_instr (ARMul_State * state, ARMword va,
+			       ARMword * instr)
+{
+	fault_t fault;
+	tlb_entry_t *tlb;
+	cache_line_t *cache;
+	int c;			//cache bit
+	ARMword pa;		//physical addr
+
+	static int debug_count = 0;	//used for debug
+
+	DEBUG_LOG(ARM11, "va = %x\n", va);
+
+	va = mmu_pid_va_map (va);
+	if (MMU_Enabled) {
+		/*align check */
+		if ((va & (INSN_SIZE - 1)) && MMU_Aligned) {
+			DEBUG_LOG(ARM11, "align\n");
+			return ALIGNMENT_FAULT;
+		}
+		else
+			va &= ~(INSN_SIZE - 1);
+
+		/*translate tlb */
+		fault = translate (state, va, I_TLB (), &tlb);
+		if (fault) {
+			DEBUG_LOG(ARM11, "translate\n");
+			return fault;
+		}
+
+		/*check access */
+		fault = check_access (state, va, tlb, 1);
+		if (fault) {
+			DEBUG_LOG(ARM11, "check_fault\n");
+			return fault;
+		}
+	}
+	//chy 2003-09-02 for test, don't use cache  ?????       
+#if 0
+	/*search cache no matter MMU enabled/disabled */
+	cache = mmu_cache_search (state, I_CACHE (), va);
+	if (cache) {
+		*instr = cache->data[va_cache_index (va, I_CACHE ())];
+		return 0;
+	}
+#endif
+	/*if MMU disabled or C flag is set alloc cache */
+	if (MMU_Disabled) {
+		c = 1;
+		pa = va;
+	}
+	else {
+		c = tlb_c_flag (tlb);
+		pa = tlb_va_to_pa (tlb, va);
+	}
+
+	//chy 2003-09-03 only read mem, don't use cache now,will change later ????
+	//*instr = mem_read_word (state, pa);
+	bus_read(32, pa, instr);
+#if 0
+//-----------------------------------------------------------
+	//chy 2003-09-02 for test????
+	if (pa >= 0xa01c8000 && pa <= 0xa01c8020) {
+		printf ("SKYEYE:load_instr: pa %x, va %x,instr %x, R15 %x\n",
+			pa, va, *instr, state->Reg[15]);
+	}
+
+//----------------------------------------------------------------------        
+#endif
+	return NO_FAULT;
+
+	if (c) {
+		int index;
+
+		debug_count++;
+		cache = mmu_cache_alloc (state, I_CACHE (), va, pa);
+		index = va_cache_index (va, I_CACHE ());
+		*instr = cache->data[va_cache_index (va, I_CACHE ())];
+	}
+	else
+		//*instr = mem_read_word (state, pa);
+		bus_read(32, pa, instr);
+
+	return NO_FAULT;
+};
+
+
+
+static fault_t
+	xscale_mmu_read_byte (ARMul_State * state, ARMword virt_addr,
+			      ARMword * data)
+{
+	//ARMword temp,offset;
+	fault_t fault;
+	fault = xscale_mmu_read (state, virt_addr, data, ARM_BYTE_TYPE);
+	return fault;
+}
+
+static fault_t
+	xscale_mmu_read_halfword (ARMul_State * state, ARMword virt_addr,
+				  ARMword * data)
+{
+	//ARMword temp,offset;
+	fault_t fault;
+	fault = xscale_mmu_read (state, virt_addr, data, ARM_HALFWORD_TYPE);
+	return fault;
+}
+
+static fault_t
+	xscale_mmu_read_word (ARMul_State * state, ARMword virt_addr,
+			      ARMword * data)
+{
+	return xscale_mmu_read (state, virt_addr, data, ARM_WORD_TYPE);
+}
+
+
+
+
+static fault_t
+	xscale_mmu_read (ARMul_State * state, ARMword va, ARMword * data,
+			 ARMword datatype)
+{
+	fault_t fault;
+//      rb_entry_t *rb;
+	tlb_entry_t *tlb;
+	cache_line_t *cache;
+	ARMword pa, real_va, temp, offset;
+	//chy 2003-09-02 for test ????
+	static unsigned chyst1 = 0, chyst2 = 0;
+
+	DEBUG_LOG(ARM11, "va = %x\n", va);
+
+	va = mmu_pid_va_map (va);
+	real_va = va;
+	/*if MMU disabled, memory_read */
+	if (MMU_Disabled) {
+		//*data = mem_read_word(state, va);
+		if (datatype == ARM_BYTE_TYPE)
+			//*data = mem_read_byte (state, va);
+			bus_read(8, va, data);
+		else if (datatype == ARM_HALFWORD_TYPE)
+			//*data = mem_read_halfword (state, va);
+			bus_read(16, va, data);
+		else if (datatype == ARM_WORD_TYPE)
+			//*data = mem_read_word (state, va);
+			bus_read(32, va, data);
+		else {
+			printf ("SKYEYE:1 xscale_mmu_read error: unknown data type %d\n", datatype);
+			// skyeye_exit (-1);
+		}
+
+		return NO_FAULT;
+	}
+
+	/*align check */
+	if (((va & 3) && (datatype == ARM_WORD_TYPE) && MMU_Aligned) ||
+	    ((va & 1) && (datatype == ARM_HALFWORD_TYPE) && MMU_Aligned)) {
+		DEBUG_LOG(ARM11, "align\n");
+		return ALIGNMENT_FAULT;
+	}			// else
+
+	va &= ~(WORD_SIZE - 1);
+
+	/*translate va to tlb */
+	fault = translate (state, va, D_TLB (), &tlb);
+	if (fault) {
+		DEBUG_LOG(ARM11, "translate\n");
+		return fault;
+	}
+	/*check access permission */
+	fault = check_access (state, va, tlb, 1);
+	if (fault)
+		return fault;
+
+#if 0
+//------------------------------------------------
+//chy 2003-09-02 for test only ,should commit ????
+	if (datatype == ARM_WORD_TYPE) {
+		if (real_va >= 0xffff0000 && real_va <= 0xffff0020) {
+			pa = tlb_va_to_pa (tlb, va);
+			*data = mem_read_word (state, pa);
+			chyst1++;
+			printf ("**SKYEYE:mmu_read word %d: pa %x, va %x, data %x, R15 %x\n", chyst1, pa, real_va, *data, state->Reg[15]);
+			/*
+			   cache==mmu_cache_search(state,MAIN_D_CACHE(),va);
+			   if(cache){
+			   *data = cache->data[va_cache_index(va, MAIN_D_CACHE())];
+			   printf("cached data %x\n",*data);
+			   }else   printf("no cached data\n");
+			 */
+		}
+	}
+//-------------------------------------------------
+#endif
+#if 0
+	/*search in read buffer */
+	rb = mmu_rb_search (RB (), va);
+	if (rb) {
+		if (rb->fault)
+			return rb->fault;
+		*data = rb->data[(va & (rb_masks[rb->type] - 1)) >> WORD_SHT];
+		goto datatrans;
+		//return 0;
+	};
+#endif
+
+	/*2004-07-19 chy: add support of xscale MMU CacheDisabled option */
+	if (MMU_CacheDisabled) {
+		//if(1){ can be used to test cache error
+		/*get phy_addr */
+		pa = tlb_va_to_pa (tlb, real_va);
+		if (datatype == ARM_BYTE_TYPE)
+			//*data = mem_read_byte (state, pa);
+			bus_read(8, pa, data);
+		else if (datatype == ARM_HALFWORD_TYPE)
+			//*data = mem_read_halfword (state, pa);
+			bus_read(16, pa, data);
+		else if (datatype == ARM_WORD_TYPE)
+			//*data = mem_read_word (state, pa);
+			bus_read(32, pa, data);
+		else {
+			printf ("SKYEYE:MMU_CacheDisabled xscale_mmu_read error: unknown data type %d\n", datatype);
+			// skyeye_exit (-1);
+		}
+		return NO_FAULT;
+	}
+
+
+	/*search main cache */
+	cache = mmu_cache_search (state, MAIN_D_CACHE (), va);
+	if (cache) {
+		*data = cache->data[va_cache_index (va, MAIN_D_CACHE ())];
+#if 0
+//------------------------------------------------------------------------
+//chy 2003-09-02 for test only ,should commit ????
+		if (real_va >= 0xffff0000 && real_va <= 0xffff0020) {
+			pa = tlb_va_to_pa (tlb, va);
+			chyst2++;
+			printf ("**SKYEYE:mmu_read wordk:cache %d: pa %x, va %x, data %x, R15 %x\n", chyst2, pa, real_va, *data, state->Reg[15]);
+		}
+//-------------------------------------------------------------------
+#endif
+		goto datatrans;
+		//return 0;
+	}
+	//chy 2003-08-24, now maybe we don't need minidcache  ????
+#if 0
+	/*search mini cache */
+	cache = mmu_cache_search (state, MINI_D_CACHE (), va);
+	if (cache) {
+		*data = cache->data[va_cache_index (va, MINI_D_CACHE ())];
+		goto datatrans;
+		//return 0;
+	}
+#endif
+	/*get phy_addr */
+	pa = tlb_va_to_pa (tlb, va);
+	//chy 2003-08-24 , in xscale it means what ?????
+#if 0
+	if ((pa >= 0xe0000000) && (pa < 0xe8000000)) {
+
+		if (tlb_c_flag (tlb)) {
+			if (tlb_b_flag (tlb)) {
+				mmu_cache_soft_flush (state, MAIN_D_CACHE (),
+						      pa);
+			}
+			else {
+				mmu_cache_soft_flush (state, MINI_D_CACHE (),
+						      pa);
+			}
+		}
+		return 0;
+	}
+#endif
+	//chy 2003-08-24, check phy addr
+	//ywc 2004-11-30, inactive this check because of using 0xc0000000 as the framebuffer start address
+	/*
+	   if(pa >= 0xb0000000){
+	   printf("SKYEYE:xscale_mmu_read: phy address 0x%x error,reg[15] 0x%x\n",pa,state->Reg[15]);
+	   return 0;
+	   }
+	 */
+
+	//chy 2003-08-24, now maybe we don't need wb  ????
+#if 0
+	/*if Buffer, drain Write Buffer first */
+	if (tlb_b_flag (tlb))
+		mmu_wb_drain_all (state, WB ());
+#endif
+	/*alloc cache or mem_read */
+	if (tlb_c_flag (tlb) && MMU_DCacheEnabled) {
+		cache_s *cache_t;
+
+		if (tlb_b_flag (tlb))
+			cache_t = MAIN_D_CACHE ();
+		else
+			cache_t = MINI_D_CACHE ();
+		cache = mmu_cache_alloc (state, cache_t, va, pa);
+		*data = cache->data[va_cache_index (va, cache_t)];
+	}
+	else {
+		//*data = mem_read_word(state, pa);
+		if (datatype == ARM_BYTE_TYPE)
+			//*data = mem_read_byte (state, pa | (real_va & 3));
+			bus_read(8, pa | (real_va & 3), data);
+		else if (datatype == ARM_HALFWORD_TYPE)
+			//*data = mem_read_halfword (state, pa | (real_va & 2));
+			bus_read(16, pa | (real_va & 2), data);
+		else if (datatype == ARM_WORD_TYPE)
+			//*data = mem_read_word (state, pa);
+			bus_read(32, pa, data);
+		else {
+			printf ("SKYEYE:2 xscale_mmu_read error: unknown data type %d\n", datatype);
+			// skyeye_exit (-1);
+		}
+		return NO_FAULT;
+	}
+
+
+      datatrans:
+	if (datatype == ARM_HALFWORD_TYPE) {
+		temp = *data;
+		offset = (((ARMword) state->bigendSig * 2) ^ (real_va & 2)) << 3;	/* bit offset into the word */
+		*data = (temp >> offset) & 0xffff;
+	}
+	else if (datatype == ARM_BYTE_TYPE) {
+		temp = *data;
+		offset = (((ARMword) state->bigendSig * 3) ^ (real_va & 3)) << 3;	/* bit offset into the word */
+		*data = (temp >> offset & 0xffL);
+	}
+      end:
+	return NO_FAULT;
+}
+
+
+static fault_t
+	xscale_mmu_write_byte (ARMul_State * state, ARMword virt_addr,
+			       ARMword data)
+{
+	return xscale_mmu_write (state, virt_addr, data, ARM_BYTE_TYPE);
+}
+
+static fault_t
+	xscale_mmu_write_halfword (ARMul_State * state, ARMword virt_addr,
+				   ARMword data)
+{
+	return xscale_mmu_write (state, virt_addr, data, ARM_HALFWORD_TYPE);
+}
+
+static fault_t
+	xscale_mmu_write_word (ARMul_State * state, ARMword virt_addr,
+			       ARMword data)
+{
+	return xscale_mmu_write (state, virt_addr, data, ARM_WORD_TYPE);
+}
+
+
+
+static fault_t
+	xscale_mmu_write (ARMul_State * state, ARMword va, ARMword data,
+			  ARMword datatype)
+{
+	tlb_entry_t *tlb;
+	cache_line_t *cache;
+	cache_s *cache_t;
+	int b;
+	ARMword pa, real_va, temp, offset;
+	fault_t fault;
+
+	ARMword index;
+//chy 2003-09-02 for test ????
+//      static unsigned chyst1=0,chyst2=0;
+
+	DEBUG_LOG(ARM11, "va = %x, val = %x\n", va, data);
+	va = mmu_pid_va_map (va);
+	real_va = va;
+
+	if (MMU_Disabled) {
+		//mem_write_word(state, va, data);
+		if (datatype == ARM_BYTE_TYPE)
+			//mem_write_byte (state, va, data);
+			bus_write(8, va, data);
+		else if (datatype == ARM_HALFWORD_TYPE)
+			//mem_write_halfword (state, va, data);
+			bus_write(16, va, data);
+		else if (datatype == ARM_WORD_TYPE)
+			//mem_write_word (state, va, data);
+			bus_write(32, va, data);
+		else {
+			printf ("SKYEYE:1 xscale_mmu_write error: unknown data type %d\n", datatype);
+			// skyeye_exit (-1);
+		}
+
+		return NO_FAULT;
+	}
+	/*align check */
+	if (((va & 3) && (datatype == ARM_WORD_TYPE) && MMU_Aligned) ||
+	    ((va & 1) && (datatype == ARM_HALFWORD_TYPE) && MMU_Aligned)) {
+		DEBUG_LOG(ARM11, "align\n");
+		return ALIGNMENT_FAULT;
+	}			//else
+	va &= ~(WORD_SIZE - 1);
+	/*tlb translate */
+	fault = translate (state, va, D_TLB (), &tlb);
+	if (fault) {
+		DEBUG_LOG(ARM11, "translate\n");
+		return fault;
+	}
+	/*tlb check access */
+	fault = check_access (state, va, tlb, 0);
+	if (fault) {
+		DEBUG_LOG(ARM11, "check_access\n");
+		return fault;
+	}
+
+	/*2004-07-19 chy: add support for xscale MMU_CacheDisabled */
+	if (MMU_CacheDisabled) {
+		//if(1){ can be used to test the cache error
+		/*get phy_addr */
+		pa = tlb_va_to_pa (tlb, real_va);
+		if (datatype == ARM_BYTE_TYPE)
+			//mem_write_byte (state, pa, data);
+			bus_write(8, pa, data);
+		else if (datatype == ARM_HALFWORD_TYPE)
+			//mem_write_halfword (state, pa, data);
+			bus_write(16, pa, data);
+		else if (datatype == ARM_WORD_TYPE)
+			//mem_write_word (state, pa, data);
+			bus_write(32, pa , data);
+		else {
+			printf ("SKYEYE:MMU_CacheDisabled xscale_mmu_write error: unknown data type %d\n", datatype);
+			// skyeye_exit (-1);
+		}
+
+		return NO_FAULT;
+	}
+
+	/*search main cache */
+	b = tlb_b_flag (tlb);
+	pa = tlb_va_to_pa (tlb, va);
+	cache = mmu_cache_search (state, MAIN_D_CACHE (), va);
+	if (cache) {
+		cache_t = MAIN_D_CACHE ();
+		goto has_cache;
+	}
+	//chy 2003-08-24, now maybe we don't need minidcache  ????
+#if 0
+	/*search mini cache */
+	cache = mmu_cache_search (state, MINI_D_CACHE (), va);
+	if (cache) {
+		cache_t = MINI_D_CACHE ();
+		goto has_cache;
+	}
+#endif
+	b = tlb_b_flag (tlb);
+	pa = tlb_va_to_pa (tlb, va);
+	//chy 2003-08-24, check phy addr 0xa0000000, size 0x04000000
+	//ywc 2004-11-30, inactive this check because of using 0xc0000000 as the framebuffer start address
+	/*
+	   if(pa >= 0xb0000000){
+	   printf("SKYEYE:xscale_mmu_write phy address 0x%x error,reg[15] 0x%x\n",pa,state->Reg[15]);
+	   return 0;
+	   }
+	 */
+
+	//chy 2003-08-24, now maybe we don't need WB  ????
+#if 0
+	if (b) {
+		if (MMU_WBEnabled) {
+			if (datatype == ARM_WORD_TYPE)
+				mmu_wb_write_bytes (state, WB (), pa, &data,
+						    4);
+			else if (datatype == ARM_HALFWORD_TYPE)
+				mmu_wb_write_bytes (state, WB (),
+						    (pa | (real_va & 2)),
+						    &data, 2);
+			else if (datatype == ARM_BYTE_TYPE)
+				mmu_wb_write_bytes (state, WB (),
+						    (pa | (real_va & 3)),
+						    &data, 1);
+
+		}
+		else {
+			if (datatype == ARM_WORD_TYPE)
+				mem_write_word (state, pa, data);
+			else if (datatype == ARM_HALFWORD_TYPE)
+				mem_write_halfword (state,
+						    (pa | (real_va & 2)),
+						    data);
+			else if (datatype == ARM_BYTE_TYPE)
+				mem_write_byte (state, (pa | (real_va & 3)),
+						data);
+		}
+	}
+	else {
+
+		mmu_wb_drain_all (state, WB ());
+
+		if (datatype == ARM_WORD_TYPE)
+			mem_write_word (state, pa, data);
+		else if (datatype == ARM_HALFWORD_TYPE)
+			mem_write_halfword (state, (pa | (real_va & 2)),
+					    data);
+		else if (datatype == ARM_BYTE_TYPE)
+			mem_write_byte (state, (pa | (real_va & 3)), data);
+	}
+#endif
+	//chy 2003-08-24, just write phy addr
+	if (datatype == ARM_WORD_TYPE)
+		//mem_write_word (state, pa, data);
+		bus_write(32, pa, data);
+	else if (datatype == ARM_HALFWORD_TYPE)
+		//mem_write_halfword (state, (pa | (real_va & 2)), data);
+		bus_write(16, pa | (real_va & 2), data);
+	else if (datatype == ARM_BYTE_TYPE)
+		//mem_write_byte (state, (pa | (real_va & 3)), data);
+		bus_write(8, (pa | (real_va & 3)), data);
+#if 0
+//-------------------------------------------------------------
+//chy 2003-09-02 for test ????
+	if (datatype == ARM_WORD_TYPE) {
+		if (real_va >= 0xffff0000 && real_va <= 0xffff0020) {
+			printf ("**SKYEYE:mmu_write word: pa %x, va %x, data %x, R15 %x \n", pa, real_va, data, state->Reg[15]);
+		}
+	}
+//--------------------------------------------------------------
+#endif
+	return NO_FAULT;
+
+      has_cache:
+	index = va_cache_index (va, cache_t);
+	//cache->data[index] = data;
+
+	if (datatype == ARM_WORD_TYPE)
+		cache->data[index] = data;
+	else if (datatype == ARM_HALFWORD_TYPE) {
+		temp = cache->data[index];
+		offset = (((ARMword) state->bigendSig * 2) ^ (real_va & 2)) << 3;	/* bit offset into the word */
+		cache->data[index] =
+			(temp & ~(0xffffL << offset)) | ((data & 0xffffL) <<
+							 offset);
+	}
+	else if (datatype == ARM_BYTE_TYPE) {
+		temp = cache->data[index];
+		offset = (((ARMword) state->bigendSig * 3) ^ (real_va & 3)) << 3;	/* bit offset into the word */
+		cache->data[index] =
+			(temp & ~(0xffL << offset)) | ((data & 0xffL) <<
+						       offset);
+	}
+
+	if (index < (cache_t->width >> (WORD_SHT + 1)))
+		cache->tag |= TAG_FIRST_HALF_DIRTY;
+	else
+		cache->tag |= TAG_LAST_HALF_DIRTY;
+//-------------------------------------------------------------
+//chy 2003-09-03 be sure the changed value will be in memory as soon as possible, so I cache can get the newest value
+#if 0
+	{
+		if (datatype == ARM_WORD_TYPE)
+			mem_write_word (state, pa, data);
+		else if (datatype == ARM_HALFWORD_TYPE)
+			mem_write_halfword (state, (pa | (real_va & 2)),
+					    data);
+		else if (datatype == ARM_BYTE_TYPE)
+			mem_write_byte (state, (pa | (real_va & 3)), data);
+	}
+#endif
+#if 0
+//chy 2003-09-02 for test ????
+	if (datatype == ARM_WORD_TYPE) {
+		if (real_va >= 0xffff0000 && real_va <= 0xffff0020) {
+			printf ("**SKYEYE:mmu_write word:cache: pa %x, va %x, data %x, R15 %x\n", pa, real_va, data, state->Reg[15]);
+		}
+	}
+//-------------------------------------------------------------
+#endif
+	if (datatype == ARM_WORD_TYPE)
+		//mem_write_word (state, pa, data);
+		bus_write(32, pa, data);
+	else if (datatype == ARM_HALFWORD_TYPE)
+		//mem_write_halfword (state, (pa | (real_va & 2)), data);
+		bus_write(16, pa | (real_va & 2), data);
+	else if (datatype == ARM_BYTE_TYPE)
+		//mem_write_byte (state, (pa | (real_va & 3)), data);
+		bus_write(8, (pa | (real_va & 3)), data);
+	return NO_FAULT;
+}
+
+ARMword xscale_cp15_mrc (ARMul_State * state,
+			 unsigned type, ARMword instr, ARMword * value)
+{
+	return xscale_mmu_mrc (state, instr, value);
+}
+
+ARMword xscale_mmu_mrc (ARMul_State * state, ARMword instr, ARMword * value)
+{
+	ARMword data;
+	unsigned opcode_2 = BITS (5, 7);
+	unsigned CRm = BITS (0, 3);
+	unsigned reg = BITS (16, 19);
+	unsigned result;
+	mmu_regnum_t creg = (mmu_regnum_t)reg;
+
+/*
+ printf("SKYEYE: xscale_cp15_mrc:opcode_2 0x%x, CRm 0x%x, reg 0x%x,reg[15] 0x%x, instr %x\n",opcode_2,CRm,reg,\
+	state->Reg[15], instr);
+*/
+	switch (creg) {
+	case MMU_ID:		//XSCALE_CP15
+		//printf("mmu_mrc read ID       \n");
+		data = (opcode_2 ? state->mmu.cache_type : state->cpu->
+			cpu_val);
+		break;
+	case MMU_CONTROL:	//XSCALE_CP15_AUX_CONTROL
+		//printf("mmu_mrc read CONTROL  \n");
+		data = (opcode_2 ? state->mmu.aux_control : state->mmu.
+			control);
+		break;
+	case MMU_TRANSLATION_TABLE_BASE:
+		//printf("mmu_mrc read TTB      \n");
+		data = state->mmu.translation_table_base;
+		break;
+	case MMU_DOMAIN_ACCESS_CONTROL:
+		//printf("mmu_mrc read DACR     \n");
+		data = state->mmu.domain_access_control;
+		break;
+	case MMU_FAULT_STATUS:
+		//printf("mmu_mrc read FSR      \n");
+		data = state->mmu.fault_status;
+		break;
+	case MMU_FAULT_ADDRESS:
+		//printf("mmu_mrc read FAR      \n");
+		data = state->mmu.fault_address;
+		break;
+	case MMU_PID:
+		//printf("mmu_mrc read PID      \n");
+		data = state->mmu.process_id;
+	case XSCALE_CP15_COPRO_ACCESS:
+		//printf("xscale cp15 read coprocessor access\n");
+		data = state->mmu.copro_access;
+		break;
+	default:
+		data = 0;
+		printf ("SKYEYE: xscale_cp15_mrc read UNKNOWN - reg %d, pc 0x%x\n", creg, state->Reg[15]);
+		// skyeye_exit (-1);
+		break;
+	}
+	*value = data;
+	//printf("SKYEYE: xscale_cp15_mrc:end value  0x%x\n",data);
+	return ARMul_DONE;
+}
+
+void xscale_cp15_cache_ops (ARMul_State * state, ARMword instr, ARMword value)
+{
+//chy: 2003-08-24 now, the BTB isn't simualted ....????
+
+	unsigned CRm, OPC_2;
+
+	CRm = BITS (0, 3);
+	OPC_2 = BITS (5, 7);
+	//err_msg("SKYEYE: xscale cp15_cache_ops:OPC_2 = 0x%x CRm = 0x%x, Reg15 0x%x\n", OPC_2, CRm,state->Reg[15]);
+
+	if (OPC_2 == 0 && CRm == 7) {
+		mmu_cache_invalidate_all (state, I_CACHE ());
+		mmu_cache_invalidate_all (state, MAIN_D_CACHE ());
+		return;
+	}
+
+	if (OPC_2 == 0 && CRm == 5) {
+		mmu_cache_invalidate_all (state, I_CACHE ());
+		return;
+	}
+	if (OPC_2 == 1 && CRm == 5) {
+		mmu_cache_invalidate (state, I_CACHE (), value);
+		return;
+	}
+
+	if (OPC_2 == 0 && CRm == 6) {
+		mmu_cache_invalidate_all (state, MAIN_D_CACHE ());
+		return;
+	}
+
+	if (OPC_2 == 1 && CRm == 6) {
+		mmu_cache_invalidate (state, MAIN_D_CACHE (), value);
+		return;
+	}
+
+	if (OPC_2 == 1 && CRm == 0xa) {
+		mmu_cache_clean (state, MAIN_D_CACHE (), value);
+		return;
+	}
+
+	if (OPC_2 == 4 && CRm == 0xa) {
+		mmu_wb_drain_all (state, WB ());
+		return;
+	}
+
+	if (OPC_2 == 6 && CRm == 5) {
+		//chy 2004-07-19 shoud fix in the future????!!!!
+		//printf("SKYEYE: xscale_cp15_cache_ops:invalidate BTB CANT!!!!!!!!!!\n"); 
+		//exit(-1);
+		return;
+	}
+
+	if (OPC_2 == 5 && CRm == 2) {
+		//printf("SKYEYE: cp15_c_o: A L in D C, value %x, reg15 %x\n",value, state->Reg[15]); 
+		//exit(-1);
+		//chy 2003-09-01 for test
+		mmu_cache_invalidate_all (state, MAIN_D_CACHE ());
+		return;
+	}
+
+	ERROR_LOG(ARM11, "SKYEYE: xscale cp15_cache_ops:Unknown OPC_2 = 0x%x CRm = 0x%x, Reg15 0x%x\n", OPC_2, CRm, state->Reg[15]);
+	// skyeye_exit (-1);
+}
+
+static void
+	xscale_cp15_tlb_ops (ARMul_State * state, ARMword instr,
+			     ARMword value)
+{
+	int CRm, OPC_2;
+
+	CRm = BITS (0, 3);
+	OPC_2 = BITS (5, 7);
+
+
+	//err_msg("SKYEYE:xscale_cp15_tlb_ops:OPC_2 = 0x%x CRm = 0x%x,Reg[15] 0x%x\n", OPC_2, CRm,state->Reg[15]);
+	if (OPC_2 == 0 && CRm == 0x7) {
+		mmu_tlb_invalidate_all (state, I_TLB ());
+		mmu_tlb_invalidate_all (state, D_TLB ());
+		return;
+	}
+
+	if (OPC_2 == 0 && CRm == 0x5) {
+		mmu_tlb_invalidate_all (state, I_TLB ());
+		return;
+	}
+
+	if (OPC_2 == 1 && CRm == 0x5) {
+		mmu_tlb_invalidate_entry (state, I_TLB (), value);
+		return;
+	}
+
+	if (OPC_2 == 0 && CRm == 0x6) {
+		mmu_tlb_invalidate_all (state, D_TLB ());
+		return;
+	}
+
+	if (OPC_2 == 1 && CRm == 0x6) {
+		mmu_tlb_invalidate_entry (state, D_TLB (), value);
+		return;
+	}
+
+	ERROR_LOG(ARM11, "SKYEYE:xscale_cp15_tlb_ops:Unknow OPC_2 = 0x%x CRm = 0x%x,Reg[15] 0x%x\n", OPC_2, CRm, state->Reg[15]);
+	// skyeye_exit (-1);
+}
+
+
+ARMword xscale_cp15_mcr (ARMul_State * state,
+			 unsigned type, ARMword instr, ARMword value)
+{
+	return xscale_mmu_mcr (state, instr, value);
+}
+
+ARMword xscale_mmu_mcr (ARMul_State * state, ARMword instr, ARMword value)
+{
+	ARMword data;
+	unsigned opcode_2 = BITS (5, 7);
+	unsigned CRm = BITS (0, 3);
+	unsigned reg = BITS (16, 19);
+	unsigned result;
+	mmu_regnum_t creg = (mmu_regnum_t)reg;
+
+	//printf("SKYEYE: xscale_cp15_mcr: opcode_2 0x%x, CRm 0x%x, reg ox%x, value 0x%x, reg[15] 0x%x, instr 0x%x\n",opcode_2,CRm,reg, value, state->Reg[15], instr);
+
+	switch (creg) {
+	case MMU_CONTROL:
+		//printf("mmu_mcr wrote CONTROL  val 0x%x       \n",value);
+		state->mmu.control =
+			(opcode_2 ? (value & 0x33) : (value & 0x3FFF));
+		break;
+	case MMU_TRANSLATION_TABLE_BASE:
+		//printf("mmu_mcr wrote TTB      val 0x%x       \n",value);
+		state->mmu.translation_table_base = value & 0xFFFFC000;
+		break;
+	case MMU_DOMAIN_ACCESS_CONTROL:
+		//printf("mmu_mcr wrote DACR    val 0x%x \n",value);
+		state->mmu.domain_access_control = value;
+		break;
+
+	case MMU_FAULT_STATUS:
+		//printf("mmu_mcr wrote FS        val 0x%x \n",value);
+		state->mmu.fault_status = value & 0x6FF;
+		break;
+	case MMU_FAULT_ADDRESS:
+		//printf("mmu_mcr wrote FA         val 0x%x \n",value);
+		state->mmu.fault_address = value;
+		break;
+
+	case MMU_CACHE_OPS:
+//              printf("mmu_mcr wrote CO         val 0x%x \n",value);
+		xscale_cp15_cache_ops (state, instr, value);
+		break;
+	case MMU_TLB_OPS:
+		//printf("mmu_mcr wrote TO          val 0x%x \n",value);
+		xscale_cp15_tlb_ops (state, instr, value);
+		break;
+	case MMU_PID:
+		//printf("mmu_mcr wrote PID          val 0x%x \n",value);
+		state->mmu.process_id = value & 0xfe000000;
+		break;
+	case XSCALE_CP15_COPRO_ACCESS:
+		//printf("xscale cp15 write coprocessor access  val 0x %x\n",value);
+		state->mmu.copro_access = value & 0x3ff;
+		break;
+
+	default:
+		printf ("SKYEYE: xscale_cp15_mcr wrote UNKNOWN - reg %d, reg15 0x%x\n", creg, state->Reg[15]);
+		break;
+	}
+	//printf("SKYEYE: xscale_cp15_mcr wrote val 0x%x\n", value);
+	return 0;
+}
+
+//teawater add for arm2x86 2005.06.24-------------------------------------------
+static int xscale_mmu_v2p_dbct (ARMul_State * state, ARMword virt_addr,
+				ARMword * phys_addr)
+{
+	fault_t fault;
+	tlb_entry_t *tlb;
+
+	virt_addr = mmu_pid_va_map (virt_addr);
+	if (MMU_Enabled) {
+
+		/*align check */
+		if ((virt_addr & (WORD_SIZE - 1)) && MMU_Aligned) {
+			DEBUG_LOG(ARM11, "align\n");
+			return ALIGNMENT_FAULT;
+		}
+		else
+			virt_addr &= ~(WORD_SIZE - 1);
+
+		/*translate tlb */
+		fault = translate (state, virt_addr, I_TLB (), &tlb);
+		if (fault) {
+			DEBUG_LOG(ARM11, "translate\n");
+			return fault;
+		}
+
+		/*check access */
+		fault = check_access (state, virt_addr, tlb, 1);
+		if (fault) {
+			DEBUG_LOG(ARM11, "check_fault\n");
+			return fault;
+		}
+	}
+
+	if (MMU_Disabled) {
+		*phys_addr = virt_addr;
+	}
+	else {
+		*phys_addr = tlb_va_to_pa (tlb, virt_addr);
+	}
+
+	return (0);
+}
+
+//AJ2D--------------------------------------------------------------------------
+
+/*xscale mmu_ops_t*/
+//mmu_ops_t xscale_mmu_ops = {
+//	xscale_cp15_init,
+//		xscale_cp15_exit,
+//		xscale_mmu_read_byte,
+//		xscale_mmu_write_byte,
+//		xscale_mmu_read_halfword,
+//		xscale_mmu_write_halfword,
+//		xscale_mmu_read_word,
+//		xscale_mmu_write_word,
+//		xscale_mmu_load_instr, xscale_mmu_mcr, xscale_mmu_mrc,
+////teawater add for arm2x86 2005.06.24-------------------------------------------
+//		xscale_mmu_v2p_dbct,
+////AJ2D--------------------------------------------------------------------------
+//};
diff --git a/src/core/arm/interpreter/vfp/asm_vfp.h b/src/core/arm/interpreter/vfp/asm_vfp.h
new file mode 100644
index 000000000..f4ab34fd4
--- /dev/null
+++ b/src/core/arm/interpreter/vfp/asm_vfp.h
@@ -0,0 +1,84 @@
+/*
+ * arch/arm/include/asm/vfp.h
+ *
+ * VFP register definitions.
+ * First, the standard VFP set.
+ */
+
+#define FPSID			cr0
+#define FPSCR			cr1
+#define MVFR1			cr6
+#define MVFR0			cr7
+#define FPEXC			cr8
+#define FPINST			cr9
+#define FPINST2			cr10
+
+/* FPSID bits */
+#define FPSID_IMPLEMENTER_BIT	(24)
+#define FPSID_IMPLEMENTER_MASK	(0xff << FPSID_IMPLEMENTER_BIT)
+#define FPSID_SOFTWARE		(1<<23)
+#define FPSID_FORMAT_BIT	(21)
+#define FPSID_FORMAT_MASK	(0x3  << FPSID_FORMAT_BIT)
+#define FPSID_NODOUBLE		(1<<20)
+#define FPSID_ARCH_BIT		(16)
+#define FPSID_ARCH_MASK		(0xF  << FPSID_ARCH_BIT)
+#define FPSID_PART_BIT		(8)
+#define FPSID_PART_MASK		(0xFF << FPSID_PART_BIT)
+#define FPSID_VARIANT_BIT	(4)
+#define FPSID_VARIANT_MASK	(0xF  << FPSID_VARIANT_BIT)
+#define FPSID_REV_BIT		(0)
+#define FPSID_REV_MASK		(0xF  << FPSID_REV_BIT)
+
+/* FPEXC bits */
+#define FPEXC_EX		(1 << 31)
+#define FPEXC_EN		(1 << 30)
+#define FPEXC_DEX		(1 << 29)
+#define FPEXC_FP2V		(1 << 28)
+#define FPEXC_VV		(1 << 27)
+#define FPEXC_TFV		(1 << 26)
+#define FPEXC_LENGTH_BIT	(8)
+#define FPEXC_LENGTH_MASK	(7 << FPEXC_LENGTH_BIT)
+#define FPEXC_IDF		(1 << 7)
+#define FPEXC_IXF		(1 << 4)
+#define FPEXC_UFF		(1 << 3)
+#define FPEXC_OFF		(1 << 2)
+#define FPEXC_DZF		(1 << 1)
+#define FPEXC_IOF		(1 << 0)
+#define FPEXC_TRAP_MASK		(FPEXC_IDF|FPEXC_IXF|FPEXC_UFF|FPEXC_OFF|FPEXC_DZF|FPEXC_IOF)
+
+/* FPSCR bits */
+#define FPSCR_DEFAULT_NAN	(1<<25)
+#define FPSCR_FLUSHTOZERO	(1<<24)
+#define FPSCR_ROUND_NEAREST	(0<<22)
+#define FPSCR_ROUND_PLUSINF	(1<<22)
+#define FPSCR_ROUND_MINUSINF	(2<<22)
+#define FPSCR_ROUND_TOZERO	(3<<22)
+#define FPSCR_RMODE_BIT		(22)
+#define FPSCR_RMODE_MASK	(3 << FPSCR_RMODE_BIT)
+#define FPSCR_STRIDE_BIT	(20)
+#define FPSCR_STRIDE_MASK	(3 << FPSCR_STRIDE_BIT)
+#define FPSCR_LENGTH_BIT	(16)
+#define FPSCR_LENGTH_MASK	(7 << FPSCR_LENGTH_BIT)
+#define FPSCR_IOE		(1<<8)
+#define FPSCR_DZE		(1<<9)
+#define FPSCR_OFE		(1<<10)
+#define FPSCR_UFE		(1<<11)
+#define FPSCR_IXE		(1<<12)
+#define FPSCR_IDE		(1<<15)
+#define FPSCR_IOC		(1<<0)
+#define FPSCR_DZC		(1<<1)
+#define FPSCR_OFC		(1<<2)
+#define FPSCR_UFC		(1<<3)
+#define FPSCR_IXC		(1<<4)
+#define FPSCR_IDC		(1<<7)
+
+/* MVFR0 bits */
+#define MVFR0_A_SIMD_BIT	(0)
+#define MVFR0_A_SIMD_MASK	(0xf << MVFR0_A_SIMD_BIT)
+
+/* Bit patterns for decoding the packaged operation descriptors */
+#define VFPOPDESC_LENGTH_BIT	(9)
+#define VFPOPDESC_LENGTH_MASK	(0x07 << VFPOPDESC_LENGTH_BIT)
+#define VFPOPDESC_UNUSED_BIT	(24)
+#define VFPOPDESC_UNUSED_MASK	(0xFF << VFPOPDESC_UNUSED_BIT)
+#define VFPOPDESC_OPDESC_MASK	(~(VFPOPDESC_LENGTH_MASK | VFPOPDESC_UNUSED_MASK))
diff --git a/src/core/arm/interpreter/vfp/vfp.cpp b/src/core/arm/interpreter/vfp/vfp.cpp
new file mode 100644
index 000000000..eea5e24a9
--- /dev/null
+++ b/src/core/arm/interpreter/vfp/vfp.cpp
@@ -0,0 +1,357 @@
+/*
+    armvfp.c - ARM VFPv3 emulation unit
+    Copyright (C) 2003 Skyeye Develop Group
+    for help please send mail to <skyeye-developer@lists.gro.clinux.org>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/* Note: this file handles interface with arm core and vfp registers */
+
+/* Opens debug for classic interpreter only */
+//#define DEBUG
+
+#include "common/common.h"
+
+#include "core/arm/interpreter/armdefs.h"
+#include "core/arm/interpreter/vfp/vfp.h"
+
+//ARMul_State* persistent_state; /* function calls from SoftFloat lib don't have an access to ARMul_state. */
+
+unsigned
+VFPInit (ARMul_State *state)
+{
+	state->VFP[VFP_OFFSET(VFP_FPSID)] = VFP_FPSID_IMPLMEN<<24 | VFP_FPSID_SW<<23 | VFP_FPSID_SUBARCH<<16 | 
+		VFP_FPSID_PARTNUM<<8 | VFP_FPSID_VARIANT<<4 | VFP_FPSID_REVISION;
+	state->VFP[VFP_OFFSET(VFP_FPEXC)] = 0;
+	state->VFP[VFP_OFFSET(VFP_FPSCR)] = 0;
+	
+	//persistent_state = state;
+	/* Reset only specify VFP_FPEXC_EN = '0' */
+
+	return No_exp;
+}
+
+unsigned
+VFPMRC (ARMul_State * state, unsigned type, ARMword instr, ARMword * value)
+{
+	/* MRC<c> <coproc>,<opc1>,<Rt>,<CRn>,<CRm>{,<opc2>} */
+	int CoProc = BITS (8, 11); /* 10 or 11 */
+	int OPC_1 = BITS (21, 23);
+	int Rt = BITS (12, 15);
+	int CRn = BITS (16, 19);
+	int CRm = BITS (0, 3);
+	int OPC_2 = BITS (5, 7);
+	
+	/* TODO check access permission */
+	
+	/* CRn/opc1 CRm/opc2 */
+	
+	if (CoProc == 10 || CoProc == 11)
+	{
+		#define VFP_MRC_TRANS
+		#include "core/arm/interpreter/vfp/vfpinstr.cpp"
+		#undef VFP_MRC_TRANS
+	}
+	DEBUG_LOG(ARM11, "Can't identify %x, CoProc %x, OPC_1 %x, Rt %x, CRn %x, CRm %x, OPC_2 %x\n", 
+	       instr, CoProc, OPC_1, Rt, CRn, CRm, OPC_2);
+
+	return ARMul_CANT;
+}
+
+unsigned
+VFPMCR (ARMul_State * state, unsigned type, ARMword instr, ARMword value)
+{
+	/* MCR<c> <coproc>,<opc1>,<Rt>,<CRn>,<CRm>{,<opc2>} */
+	int CoProc = BITS (8, 11); /* 10 or 11 */
+	int OPC_1 = BITS (21, 23);
+	int Rt = BITS (12, 15);
+	int CRn = BITS (16, 19);
+	int CRm = BITS (0, 3);
+	int OPC_2 = BITS (5, 7);
+	
+	/* TODO check access permission */
+	
+	/* CRn/opc1 CRm/opc2 */
+	if (CoProc == 10 || CoProc == 11)
+	{
+		#define VFP_MCR_TRANS
+		#include "core/arm/interpreter/vfp/vfpinstr.cpp"
+		#undef VFP_MCR_TRANS
+	}
+	DEBUG_LOG(ARM11, "Can't identify %x, CoProc %x, OPC_1 %x, Rt %x, CRn %x, CRm %x, OPC_2 %x\n", 
+	       instr, CoProc, OPC_1, Rt, CRn, CRm, OPC_2);
+
+	return ARMul_CANT;
+}
+
+unsigned
+VFPMRRC (ARMul_State * state, unsigned type, ARMword instr, ARMword * value1, ARMword * value2)
+{
+	/* MCRR<c> <coproc>,<opc1>,<Rt>,<Rt2>,<CRm> */
+	int CoProc = BITS (8, 11); /* 10 or 11 */
+	int OPC_1 = BITS (4, 7);
+	int Rt = BITS (12, 15);
+	int Rt2 = BITS (16, 19);
+	int CRm = BITS (0, 3);
+	
+	if (CoProc == 10 || CoProc == 11)
+	{
+		#define VFP_MRRC_TRANS
+		#include "core/arm/interpreter/vfp/vfpinstr.cpp"
+		#undef VFP_MRRC_TRANS
+	}
+	DEBUG_LOG(ARM11, "Can't identify %x, CoProc %x, OPC_1 %x, Rt %x, Rt2 %x, CRm %x\n", 
+	       instr, CoProc, OPC_1, Rt, Rt2, CRm);
+
+	return ARMul_CANT;
+}
+
+unsigned
+VFPMCRR (ARMul_State * state, unsigned type, ARMword instr, ARMword value1, ARMword value2)
+{
+	/* MCRR<c> <coproc>,<opc1>,<Rt>,<Rt2>,<CRm> */
+	int CoProc = BITS (8, 11); /* 10 or 11 */
+	int OPC_1 = BITS (4, 7);
+	int Rt = BITS (12, 15);
+	int Rt2 = BITS (16, 19);
+	int CRm = BITS (0, 3);
+	
+	/* TODO check access permission */
+	
+	/* CRn/opc1 CRm/opc2 */
+	
+	if (CoProc == 11 || CoProc == 10)
+	{
+		#define VFP_MCRR_TRANS
+		#include "core/arm/interpreter/vfp/vfpinstr.cpp"
+		#undef VFP_MCRR_TRANS
+	}
+	DEBUG_LOG(ARM11, "Can't identify %x, CoProc %x, OPC_1 %x, Rt %x, Rt2 %x, CRm %x\n", 
+	       instr, CoProc, OPC_1, Rt, Rt2, CRm);
+
+	return ARMul_CANT;
+}
+
+unsigned
+VFPSTC (ARMul_State * state, unsigned type, ARMword instr, ARMword * value)
+{
+	/* STC{L}<c> <coproc>,<CRd>,[<Rn>],<option> */
+	int CoProc = BITS (8, 11); /* 10 or 11 */
+	int CRd = BITS (12, 15);
+	int Rn = BITS (16, 19);
+	int imm8 = BITS (0, 7);
+	int P = BIT(24);
+	int U = BIT(23);
+	int D = BIT(22);
+	int W = BIT(21);
+	
+	/* TODO check access permission */
+	
+	/* VSTM */
+	if ( (P|U|D|W) == 0 )
+	{
+		DEBUG_LOG(ARM11, "In %s, UNDEFINED\n", __FUNCTION__); exit(-1);
+	}
+	if (CoProc == 10 || CoProc == 11)
+	{
+		#if 1
+		if (P == 0 && U == 0 && W == 0)
+		{
+			DEBUG_LOG(ARM11, "VSTM Related encodings\n"); exit(-1);
+		}
+		if (P == U && W == 1)
+		{
+			DEBUG_LOG(ARM11, "UNDEFINED\n"); exit(-1);
+		}
+		#endif
+
+		#define VFP_STC_TRANS
+		#include "core/arm/interpreter/vfp/vfpinstr.cpp"
+		#undef VFP_STC_TRANS
+	}
+	DEBUG_LOG(ARM11, "Can't identify %x, CoProc %x, CRd %x, Rn %x, imm8 %x, P %x, U %x, D %x, W %x\n", 
+	       instr, CoProc, CRd, Rn, imm8, P, U, D, W);
+
+	return ARMul_CANT;
+}
+
+unsigned
+VFPLDC (ARMul_State * state, unsigned type, ARMword instr, ARMword value)
+{
+	/* LDC{L}<c> <coproc>,<CRd>,[<Rn>] */
+	int CoProc = BITS (8, 11); /* 10 or 11 */
+	int CRd = BITS (12, 15);
+	int Rn = BITS (16, 19);
+	int imm8 = BITS (0, 7);
+	int P = BIT(24);
+	int U = BIT(23);
+	int D = BIT(22);
+	int W = BIT(21);
+	
+	/* TODO check access permission */
+	
+	if ( (P|U|D|W) == 0 )
+	{
+		DEBUG_LOG(ARM11, "In %s, UNDEFINED\n", __FUNCTION__); exit(-1);
+	}
+	if (CoProc == 10 || CoProc == 11)
+	{
+		#define VFP_LDC_TRANS
+		#include "core/arm/interpreter/vfp/vfpinstr.cpp"
+		#undef VFP_LDC_TRANS
+	}
+	DEBUG_LOG(ARM11, "Can't identify %x, CoProc %x, CRd %x, Rn %x, imm8 %x, P %x, U %x, D %x, W %x\n", 
+	       instr, CoProc, CRd, Rn, imm8, P, U, D, W);
+
+	return ARMul_CANT;
+}
+
+unsigned
+VFPCDP (ARMul_State * state, unsigned type, ARMword instr)
+{
+	/* CDP<c> <coproc>,<opc1>,<CRd>,<CRn>,<CRm>,<opc2> */
+	int CoProc = BITS (8, 11); /* 10 or 11 */
+	int OPC_1 = BITS (20, 23);
+	int CRd = BITS (12, 15);
+	int CRn = BITS (16, 19);
+	int CRm = BITS (0, 3);
+	int OPC_2 = BITS (5, 7);
+	
+	/* TODO check access permission */
+	
+	/* CRn/opc1 CRm/opc2 */
+
+	if (CoProc == 10 || CoProc == 11)
+	{
+		#define VFP_CDP_TRANS
+		#include "core/arm/interpreter/vfp/vfpinstr.cpp"
+		#undef VFP_CDP_TRANS
+		
+		int exceptions = 0;
+		if (CoProc == 10)
+			exceptions = vfp_single_cpdo(state, instr, state->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else 
+			exceptions = vfp_double_cpdo(state, instr, state->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		vfp_raise_exceptions(state, exceptions, instr, state->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		return ARMul_DONE;
+	}
+	DEBUG_LOG(ARM11, "Can't identify %x\n", instr);
+	return ARMul_CANT;
+}
+
+
+/* ----------- MRC ------------ */
+#define VFP_MRC_IMPL
+#include "core/arm/interpreter/vfp/vfpinstr.cpp"
+#undef VFP_MRC_IMPL
+
+#define VFP_MRRC_IMPL
+#include "core/arm/interpreter/vfp/vfpinstr.cpp"
+#undef VFP_MRRC_IMPL
+
+
+/* ----------- MCR ------------ */
+#define VFP_MCR_IMPL
+#include "core/arm/interpreter/vfp/vfpinstr.cpp"
+#undef VFP_MCR_IMPL
+
+#define VFP_MCRR_IMPL
+#include "core/arm/interpreter/vfp/vfpinstr.cpp"
+#undef VFP_MCRR_IMPL
+
+/* Memory operation are not inlined, as old Interpreter and Fast interpreter
+   don't have the same memory operation interface.
+   Old interpreter framework does one access to coprocessor per data, and
+   handles already data write, as well as address computation,
+   which is not the case for Fast interpreter. Therefore, implementation
+   of vfp instructions in old interpreter and fast interpreter are separate. */
+
+/* ----------- STC ------------ */
+#define VFP_STC_IMPL
+#include "core/arm/interpreter/vfp/vfpinstr.cpp"
+#undef VFP_STC_IMPL
+
+
+/* ----------- LDC ------------ */
+#define VFP_LDC_IMPL
+#include "core/arm/interpreter/vfp/vfpinstr.cpp"
+#undef VFP_LDC_IMPL
+
+
+/* ----------- CDP ------------ */
+#define VFP_CDP_IMPL
+#include "core/arm/interpreter/vfp/vfpinstr.cpp"
+#undef VFP_CDP_IMPL
+
+/* Miscellaneous functions */
+int32_t vfp_get_float(arm_core_t* state, unsigned int reg)
+{
+	DBG("VFP get float: s%d=[%08x]\n", reg, state->ExtReg[reg]);
+	return state->ExtReg[reg];
+}
+
+void vfp_put_float(arm_core_t* state, int32_t val, unsigned int reg)
+{
+	DBG("VFP put float: s%d <= [%08x]\n", reg, val);
+	state->ExtReg[reg] = val;
+}
+
+uint64_t vfp_get_double(arm_core_t* state, unsigned int reg)
+{
+	uint64_t result;
+	result = ((uint64_t) state->ExtReg[reg*2+1])<<32 | state->ExtReg[reg*2];
+	DBG("VFP get double: s[%d-%d]=[%016llx]\n", reg*2+1, reg*2, result);
+	return result;
+}
+
+void vfp_put_double(arm_core_t* state, uint64_t val, unsigned int reg)
+{
+	DBG("VFP put double: s[%d-%d] <= [%08x-%08x]\n", reg*2+1, reg*2, (uint32_t) (val>>32), (uint32_t) (val & 0xffffffff));
+	state->ExtReg[reg*2] = (uint32_t) (val & 0xffffffff);
+	state->ExtReg[reg*2+1] = (uint32_t) (val>>32);
+}
+
+
+
+/*
+ * Process bitmask of exception conditions. (from vfpmodule.c)
+ */
+void vfp_raise_exceptions(ARMul_State* state, u32 exceptions, u32 inst, u32 fpscr)
+{
+	int si_code = 0;
+
+	vfpdebug("VFP: raising exceptions %08x\n", exceptions);
+
+	if (exceptions == VFP_EXCEPTION_ERROR) {
+		DEBUG_LOG(ARM11, "unhandled bounce %x\n", inst);
+		exit(-1);
+		return;
+	}
+
+	/*
+	 * If any of the status flags are set, update the FPSCR.
+	 * Comparison instructions always return at least one of
+	 * these flags set.
+	 */
+	if (exceptions & (FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V))
+		fpscr &= ~(FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V);
+
+	fpscr |= exceptions;
+
+	state->VFP[VFP_OFFSET(VFP_FPSCR)] = fpscr;
+}
diff --git a/src/core/arm/interpreter/vfp/vfp.h b/src/core/arm/interpreter/vfp/vfp.h
new file mode 100644
index 000000000..f738a615b
--- /dev/null
+++ b/src/core/arm/interpreter/vfp/vfp.h
@@ -0,0 +1,111 @@
+/* 
+    vfp/vfp.h - ARM VFPv3 emulation unit - vfp interface
+    Copyright (C) 2003 Skyeye Develop Group
+    for help please send mail to <skyeye-developer@lists.gro.clinux.org>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#ifndef __VFP_H__
+#define __VFP_H__
+
+#define DBG(...) DEBUG_LOG(ARM11, __VA_ARGS__)
+
+#define vfpdebug //printf
+
+#include "core/arm/interpreter/vfp/vfp_helper.h" /* for references to cdp SoftFloat functions */
+
+unsigned VFPInit (ARMul_State *state);
+unsigned VFPMRC (ARMul_State * state, unsigned type, ARMword instr, ARMword * value);
+unsigned VFPMCR (ARMul_State * state, unsigned type, ARMword instr, ARMword value);
+unsigned VFPMRRC (ARMul_State * state, unsigned type, ARMword instr, ARMword * value1, ARMword * value2);
+unsigned VFPMCRR (ARMul_State * state, unsigned type, ARMword instr, ARMword value1, ARMword value2);
+unsigned VFPSTC (ARMul_State * state, unsigned type, ARMword instr, ARMword * value);
+unsigned VFPLDC (ARMul_State * state, unsigned type, ARMword instr, ARMword value);
+unsigned VFPCDP (ARMul_State * state, unsigned type, ARMword instr);
+
+/* FPSID Information */
+#define VFP_FPSID_IMPLMEN 0 	/* should be the same as cp15 0 c0 0*/
+#define VFP_FPSID_SW 0
+#define VFP_FPSID_SUBARCH 0x2 	/* VFP version. Current is v3 (not strict) */
+#define VFP_FPSID_PARTNUM 0x1
+#define VFP_FPSID_VARIANT 0x1
+#define VFP_FPSID_REVISION 0x1
+
+/* FPEXC Flags */
+#define VFP_FPEXC_EX 1<<31
+#define VFP_FPEXC_EN 1<<30
+
+/* FPSCR Flags */
+#define VFP_FPSCR_NFLAG 1<<31
+#define VFP_FPSCR_ZFLAG 1<<30
+#define VFP_FPSCR_CFLAG 1<<29
+#define VFP_FPSCR_VFLAG 1<<28
+
+#define VFP_FPSCR_AHP 1<<26 	/* Alternative Half Precision */
+#define VFP_FPSCR_DN 1<<25 	/* Default NaN */
+#define VFP_FPSCR_FZ 1<<24 	/* Flush-to-zero */
+#define VFP_FPSCR_RMODE 3<<22 	/* Rounding Mode */
+#define VFP_FPSCR_STRIDE 3<<20 	/* Stride (vector) */
+#define VFP_FPSCR_LEN 7<<16 	/* Stride (vector) */
+
+#define VFP_FPSCR_IDE 1<<15	/* Input Denormal exc */
+#define VFP_FPSCR_IXE 1<<12	/* Inexact exc */
+#define VFP_FPSCR_UFE 1<<11	/* Undeflow exc */
+#define VFP_FPSCR_OFE 1<<10	/* Overflow exc */
+#define VFP_FPSCR_DZE 1<<9	/* Division by Zero exc */
+#define VFP_FPSCR_IOE 1<<8	/* Invalid Operation exc */
+
+#define VFP_FPSCR_IDC 1<<7	/* Input Denormal cum exc */
+#define VFP_FPSCR_IXC 1<<4	/* Inexact cum exc */
+#define VFP_FPSCR_UFC 1<<3	/* Undeflow cum exc */
+#define VFP_FPSCR_OFC 1<<2	/* Overflow cum exc */
+#define VFP_FPSCR_DZC 1<<1	/* Division by Zero cum exc */
+#define VFP_FPSCR_IOC 1<<0	/* Invalid Operation cum exc */
+
+/* Inline instructions. Note: Used in a cpp file as well */
+#ifdef __cplusplus
+ extern "C" {
+#endif
+int32_t vfp_get_float(ARMul_State * state, unsigned int reg);
+void vfp_put_float(ARMul_State * state, int32_t val, unsigned int reg);
+uint64_t vfp_get_double(ARMul_State * state, unsigned int reg);
+void vfp_put_double(ARMul_State * state, uint64_t val, unsigned int reg);
+void vfp_raise_exceptions(ARMul_State * state, uint32_t exceptions, uint32_t inst, uint32_t fpscr);
+u32 vfp_single_cpdo(ARMul_State* state, u32 inst, u32 fpscr);
+u32 vfp_double_cpdo(ARMul_State* state, u32 inst, u32 fpscr);
+
+/* MRC */
+inline void VMRS(ARMul_State * state, ARMword reg, ARMword Rt, ARMword *value);
+inline void VMOVBRS(ARMul_State * state, ARMword to_arm, ARMword t, ARMword n, ARMword *value);
+inline void VMOVBRRD(ARMul_State * state, ARMword to_arm, ARMword t, ARMword t2, ARMword n, ARMword *value1, ARMword *value2);
+inline void VMOVI(ARMul_State * state, ARMword single, ARMword d, ARMword imm);
+inline void VMOVR(ARMul_State * state, ARMword single, ARMword d, ARMword imm);
+/* MCR */
+inline void VMSR(ARMul_State * state, ARMword reg, ARMword Rt);
+/* STC */
+inline int VSTM(ARMul_State * state, int type, ARMword instr, ARMword* value);
+inline int VPUSH(ARMul_State * state, int type, ARMword instr, ARMword* value);
+inline int VSTR(ARMul_State * state, int type, ARMword instr, ARMword* value);
+/* LDC */
+inline int VLDM(ARMul_State * state, int type, ARMword instr, ARMword value);
+inline int VPOP(ARMul_State * state, int type, ARMword instr, ARMword value);
+inline int VLDR(ARMul_State * state, int type, ARMword instr, ARMword value);
+
+#ifdef __cplusplus
+ }
+#endif
+
+#endif
diff --git a/src/core/arm/interpreter/vfp/vfp_helper.h b/src/core/arm/interpreter/vfp/vfp_helper.h
new file mode 100644
index 000000000..80f9a93f4
--- /dev/null
+++ b/src/core/arm/interpreter/vfp/vfp_helper.h
@@ -0,0 +1,541 @@
+/*
+    vfp/vfp.h - ARM VFPv3 emulation unit - SoftFloat lib helper
+    Copyright (C) 2003 Skyeye Develop Group
+    for help please send mail to <skyeye-developer@lists.gro.clinux.org>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/* 
+ *  The following code is derivative from Linux Android kernel vfp
+ *  floating point support.
+ * 
+ *  Copyright (C) 2004 ARM Limited.
+ *  Written by Deep Blue Solutions Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __VFP_HELPER_H__
+#define __VFP_HELPER_H__
+
+/* Custom edit */
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include "core/arm/interpreter/armdefs.h"
+
+#define u16 uint16_t
+#define u32 uint32_t
+#define u64 uint64_t
+#define s16 int16_t
+#define s32 int32_t
+#define s64 int64_t
+
+#define pr_info //printf
+#define pr_debug //printf
+
+static u32 fls(int x);
+#define do_div(n, base) {n/=base;}
+
+/* From vfpinstr.h */
+
+#define INST_CPRTDO(inst)	(((inst) & 0x0f000000) == 0x0e000000)
+#define INST_CPRT(inst)		((inst) & (1 << 4))
+#define INST_CPRT_L(inst)	((inst) & (1 << 20))
+#define INST_CPRT_Rd(inst)	(((inst) & (15 << 12)) >> 12)
+#define INST_CPRT_OP(inst)	(((inst) >> 21) & 7)
+#define INST_CPNUM(inst)	((inst) & 0xf00)
+#define CPNUM(cp)		((cp) << 8)
+
+#define FOP_MASK	(0x00b00040)
+#define FOP_FMAC	(0x00000000)
+#define FOP_FNMAC	(0x00000040)
+#define FOP_FMSC	(0x00100000)
+#define FOP_FNMSC	(0x00100040)
+#define FOP_FMUL	(0x00200000)
+#define FOP_FNMUL	(0x00200040)
+#define FOP_FADD	(0x00300000)
+#define FOP_FSUB	(0x00300040)
+#define FOP_FDIV	(0x00800000)
+#define FOP_EXT		(0x00b00040)
+
+#define FOP_TO_IDX(inst)	((inst & 0x00b00000) >> 20 | (inst & (1 << 6)) >> 4)
+
+#define FEXT_MASK	(0x000f0080)
+#define FEXT_FCPY	(0x00000000)
+#define FEXT_FABS	(0x00000080)
+#define FEXT_FNEG	(0x00010000)
+#define FEXT_FSQRT	(0x00010080)
+#define FEXT_FCMP	(0x00040000)
+#define FEXT_FCMPE	(0x00040080)
+#define FEXT_FCMPZ	(0x00050000)
+#define FEXT_FCMPEZ	(0x00050080)
+#define FEXT_FCVT	(0x00070080)
+#define FEXT_FUITO	(0x00080000)
+#define FEXT_FSITO	(0x00080080)
+#define FEXT_FTOUI	(0x000c0000)
+#define FEXT_FTOUIZ	(0x000c0080)
+#define FEXT_FTOSI	(0x000d0000)
+#define FEXT_FTOSIZ	(0x000d0080)
+
+#define FEXT_TO_IDX(inst)	((inst & 0x000f0000) >> 15 | (inst & (1 << 7)) >> 7)
+
+#define vfp_get_sd(inst)	((inst & 0x0000f000) >> 11 | (inst & (1 << 22)) >> 22)
+#define vfp_get_dd(inst)	((inst & 0x0000f000) >> 12 | (inst & (1 << 22)) >> 18)
+#define vfp_get_sm(inst)	((inst & 0x0000000f) << 1 | (inst & (1 << 5)) >> 5)
+#define vfp_get_dm(inst)	((inst & 0x0000000f) | (inst & (1 << 5)) >> 1)
+#define vfp_get_sn(inst)	((inst & 0x000f0000) >> 15 | (inst & (1 << 7)) >> 7)
+#define vfp_get_dn(inst)	((inst & 0x000f0000) >> 16 | (inst & (1 << 7)) >> 3)
+
+#define vfp_single(inst)	(((inst) & 0x0000f00) == 0xa00)
+
+#define FPSCR_N	(1 << 31)
+#define FPSCR_Z	(1 << 30)
+#define FPSCR_C (1 << 29)
+#define FPSCR_V	(1 << 28)
+
+/* -------------- */
+
+/* From asm/include/vfp.h */
+
+/* FPSCR bits */
+#define FPSCR_DEFAULT_NAN	(1<<25)
+#define FPSCR_FLUSHTOZERO	(1<<24)
+#define FPSCR_ROUND_NEAREST	(0<<22)
+#define FPSCR_ROUND_PLUSINF	(1<<22)
+#define FPSCR_ROUND_MINUSINF	(2<<22)
+#define FPSCR_ROUND_TOZERO	(3<<22)
+#define FPSCR_RMODE_BIT		(22)
+#define FPSCR_RMODE_MASK	(3 << FPSCR_RMODE_BIT)
+#define FPSCR_STRIDE_BIT	(20)
+#define FPSCR_STRIDE_MASK	(3 << FPSCR_STRIDE_BIT)
+#define FPSCR_LENGTH_BIT	(16)
+#define FPSCR_LENGTH_MASK	(7 << FPSCR_LENGTH_BIT)
+#define FPSCR_IOE		(1<<8)
+#define FPSCR_DZE		(1<<9)
+#define FPSCR_OFE		(1<<10)
+#define FPSCR_UFE		(1<<11)
+#define FPSCR_IXE		(1<<12)
+#define FPSCR_IDE		(1<<15)
+#define FPSCR_IOC		(1<<0)
+#define FPSCR_DZC		(1<<1)
+#define FPSCR_OFC		(1<<2)
+#define FPSCR_UFC		(1<<3)
+#define FPSCR_IXC		(1<<4)
+#define FPSCR_IDC		(1<<7)
+
+/* ---------------- */
+
+static inline u32 vfp_shiftright32jamming(u32 val, unsigned int shift)
+{
+	if (shift) {
+		if (shift < 32)
+			val = val >> shift | ((val << (32 - shift)) != 0);
+		else
+			val = val != 0;
+	}
+	return val;
+}
+
+static inline u64 vfp_shiftright64jamming(u64 val, unsigned int shift)
+{
+	if (shift) {
+		if (shift < 64)
+			val = val >> shift | ((val << (64 - shift)) != 0);
+		else
+			val = val != 0;
+	}
+	return val;
+}
+
+static inline u32 vfp_hi64to32jamming(u64 val)
+{
+	u32 v;
+	u32 highval = val >> 32;
+	u32 lowval = val & 0xffffffff;
+
+	if (lowval >= 1)
+		v = highval | 1;
+	else
+		v = highval;
+
+	return v;
+}
+
+static inline void add128(u64 *resh, u64 *resl, u64 nh, u64 nl, u64 mh, u64 ml)
+{
+	*resl = nl + ml;
+	*resh = nh + mh;
+	if (*resl < nl)
+		*resh += 1;
+}
+
+static inline void sub128(u64 *resh, u64 *resl, u64 nh, u64 nl, u64 mh, u64 ml)
+{
+	*resl = nl - ml;
+	*resh = nh - mh;
+	if (*resl > nl)
+		*resh -= 1;
+}
+
+static inline void mul64to128(u64 *resh, u64 *resl, u64 n, u64 m)
+{
+	u32 nh, nl, mh, ml;
+	u64 rh, rma, rmb, rl;
+
+	nl = n;
+	ml = m;
+	rl = (u64)nl * ml;
+
+	nh = n >> 32;
+	rma = (u64)nh * ml;
+
+	mh = m >> 32;
+	rmb = (u64)nl * mh;
+	rma += rmb;
+
+	rh = (u64)nh * mh;
+	rh += ((u64)(rma < rmb) << 32) + (rma >> 32);
+
+	rma <<= 32;
+	rl += rma;
+	rh += (rl < rma);
+
+	*resl = rl;
+	*resh = rh;
+}
+
+static inline void shift64left(u64 *resh, u64 *resl, u64 n)
+{
+	*resh = n >> 63;
+	*resl = n << 1;
+}
+
+static inline u64 vfp_hi64multiply64(u64 n, u64 m)
+{
+	u64 rh, rl;
+	mul64to128(&rh, &rl, n, m);
+	return rh | (rl != 0);
+}
+
+static inline u64 vfp_estimate_div128to64(u64 nh, u64 nl, u64 m)
+{
+	u64 mh, ml, remh, reml, termh, terml, z;
+
+	if (nh >= m)
+		return ~0ULL;
+	mh = m >> 32;
+	if (mh << 32 <= nh) {
+		z = 0xffffffff00000000ULL;
+	} else {
+		z = nh;
+		do_div(z, mh);
+		z <<= 32;
+	}
+	mul64to128(&termh, &terml, m, z);
+	sub128(&remh, &reml, nh, nl, termh, terml);
+	ml = m << 32;
+	while ((s64)remh < 0) {
+		z -= 0x100000000ULL;
+		add128(&remh, &reml, remh, reml, mh, ml);
+	}
+	remh = (remh << 32) | (reml >> 32);
+	if (mh << 32 <= remh) {
+		z |= 0xffffffff;
+	} else {
+		do_div(remh, mh);
+		z |= remh;
+	}
+	return z;
+}
+
+/*
+ * Operations on unpacked elements
+ */
+#define vfp_sign_negate(sign)	(sign ^ 0x8000)
+
+/*
+ * Single-precision
+ */
+struct vfp_single {
+	s16	exponent;
+	u16	sign;
+	u32	significand;
+};
+
+#ifdef __cplusplus
+ extern "C" {
+#endif
+extern s32 vfp_get_float(ARMul_State * state, unsigned int reg);
+extern void vfp_put_float(ARMul_State * state, s32 val, unsigned int reg);
+#ifdef __cplusplus
+ }
+#endif
+
+/*
+ * VFP_SINGLE_MANTISSA_BITS - number of bits in the mantissa
+ * VFP_SINGLE_EXPONENT_BITS - number of bits in the exponent
+ * VFP_SINGLE_LOW_BITS - number of low bits in the unpacked significand
+ *  which are not propagated to the float upon packing.
+ */
+#define VFP_SINGLE_MANTISSA_BITS	(23)
+#define VFP_SINGLE_EXPONENT_BITS	(8)
+#define VFP_SINGLE_LOW_BITS		(32 - VFP_SINGLE_MANTISSA_BITS - 2)
+#define VFP_SINGLE_LOW_BITS_MASK	((1 << VFP_SINGLE_LOW_BITS) - 1)
+
+/*
+ * The bit in an unpacked float which indicates that it is a quiet NaN
+ */
+#define VFP_SINGLE_SIGNIFICAND_QNAN	(1 << (VFP_SINGLE_MANTISSA_BITS - 1 + VFP_SINGLE_LOW_BITS))
+
+/*
+ * Operations on packed single-precision numbers
+ */
+#define vfp_single_packed_sign(v)	((v) & 0x80000000)
+#define vfp_single_packed_negate(v)	((v) ^ 0x80000000)
+#define vfp_single_packed_abs(v)	((v) & ~0x80000000)
+#define vfp_single_packed_exponent(v)	(((v) >> VFP_SINGLE_MANTISSA_BITS) & ((1 << VFP_SINGLE_EXPONENT_BITS) - 1))
+#define vfp_single_packed_mantissa(v)	((v) & ((1 << VFP_SINGLE_MANTISSA_BITS) - 1))
+
+/*
+ * Unpack a single-precision float.  Note that this returns the magnitude
+ * of the single-precision float mantissa with the 1. if necessary,
+ * aligned to bit 30.
+ */
+static inline void vfp_single_unpack(struct vfp_single *s, s32 val)
+{
+	u32 significand;
+
+	s->sign = vfp_single_packed_sign(val) >> 16,
+	s->exponent = vfp_single_packed_exponent(val);
+
+	significand = (u32) val;
+	significand = (significand << (32 - VFP_SINGLE_MANTISSA_BITS)) >> 2;
+	if (s->exponent && s->exponent != 255)
+		significand |= 0x40000000;
+	s->significand = significand;
+}
+
+/*
+ * Re-pack a single-precision float.  This assumes that the float is
+ * already normalised such that the MSB is bit 30, _not_ bit 31.
+ */
+static inline s32 vfp_single_pack(struct vfp_single *s)
+{
+	u32 val;
+	val = (s->sign << 16) +
+	      (s->exponent << VFP_SINGLE_MANTISSA_BITS) +
+	      (s->significand >> VFP_SINGLE_LOW_BITS);
+	return (s32)val;
+}
+
+#define VFP_NUMBER		(1<<0)
+#define VFP_ZERO		(1<<1)
+#define VFP_DENORMAL		(1<<2)
+#define VFP_INFINITY		(1<<3)
+#define VFP_NAN			(1<<4)
+#define VFP_NAN_SIGNAL		(1<<5)
+
+#define VFP_QNAN		(VFP_NAN)
+#define VFP_SNAN		(VFP_NAN|VFP_NAN_SIGNAL)
+
+static inline int vfp_single_type(struct vfp_single *s)
+{
+	int type = VFP_NUMBER;
+	if (s->exponent == 255) {
+		if (s->significand == 0)
+			type = VFP_INFINITY;
+		else if (s->significand & VFP_SINGLE_SIGNIFICAND_QNAN)
+			type = VFP_QNAN;
+		else
+			type = VFP_SNAN;
+	} else if (s->exponent == 0) {
+		if (s->significand == 0)
+			type |= VFP_ZERO;
+		else
+			type |= VFP_DENORMAL;
+	}
+	return type;
+}
+
+
+u32 vfp_single_normaliseround(ARMul_State* state, int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions, const char *func);
+
+/*
+ * Double-precision
+ */
+struct vfp_double {
+	s16	exponent;
+	u16	sign;
+	u64	significand;
+};
+
+/*
+ * VFP_REG_ZERO is a special register number for vfp_get_double
+ * which returns (double)0.0.  This is useful for the compare with
+ * zero instructions.
+ */
+#ifdef CONFIG_VFPv3
+#define VFP_REG_ZERO	32
+#else
+#define VFP_REG_ZERO	16
+#endif
+#ifdef __cplusplus
+ extern "C" {
+#endif
+extern u64 vfp_get_double(ARMul_State * state, unsigned int reg);
+extern void vfp_put_double(ARMul_State * state, u64 val, unsigned int reg);
+#ifdef __cplusplus
+ }
+#endif
+#define VFP_DOUBLE_MANTISSA_BITS	(52)
+#define VFP_DOUBLE_EXPONENT_BITS	(11)
+#define VFP_DOUBLE_LOW_BITS		(64 - VFP_DOUBLE_MANTISSA_BITS - 2)
+#define VFP_DOUBLE_LOW_BITS_MASK	((1 << VFP_DOUBLE_LOW_BITS) - 1)
+
+/*
+ * The bit in an unpacked double which indicates that it is a quiet NaN
+ */
+#define VFP_DOUBLE_SIGNIFICAND_QNAN	(1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1 + VFP_DOUBLE_LOW_BITS))
+
+/*
+ * Operations on packed single-precision numbers
+ */
+#define vfp_double_packed_sign(v)	((v) & (1ULL << 63))
+#define vfp_double_packed_negate(v)	((v) ^ (1ULL << 63))
+#define vfp_double_packed_abs(v)	((v) & ~(1ULL << 63))
+#define vfp_double_packed_exponent(v)	(((v) >> VFP_DOUBLE_MANTISSA_BITS) & ((1 << VFP_DOUBLE_EXPONENT_BITS) - 1))
+#define vfp_double_packed_mantissa(v)	((v) & ((1ULL << VFP_DOUBLE_MANTISSA_BITS) - 1))
+
+/*
+ * Unpack a double-precision float.  Note that this returns the magnitude
+ * of the double-precision float mantissa with the 1. if necessary,
+ * aligned to bit 62.
+ */
+static inline void vfp_double_unpack(struct vfp_double *s, s64 val)
+{
+	u64 significand;
+
+	s->sign = vfp_double_packed_sign(val) >> 48;
+	s->exponent = vfp_double_packed_exponent(val);
+
+	significand = (u64) val;
+	significand = (significand << (64 - VFP_DOUBLE_MANTISSA_BITS)) >> 2;
+	if (s->exponent && s->exponent != 2047)
+		significand |= (1ULL << 62);
+	s->significand = significand;
+}
+
+/*
+ * Re-pack a double-precision float.  This assumes that the float is
+ * already normalised such that the MSB is bit 30, _not_ bit 31.
+ */
+static inline s64 vfp_double_pack(struct vfp_double *s)
+{
+	u64 val;
+	val = ((u64)s->sign << 48) +
+	      ((u64)s->exponent << VFP_DOUBLE_MANTISSA_BITS) +
+	      (s->significand >> VFP_DOUBLE_LOW_BITS);
+	return (s64)val;
+}
+
+static inline int vfp_double_type(struct vfp_double *s)
+{
+	int type = VFP_NUMBER;
+	if (s->exponent == 2047) {
+		if (s->significand == 0)
+			type = VFP_INFINITY;
+		else if (s->significand & VFP_DOUBLE_SIGNIFICAND_QNAN)
+			type = VFP_QNAN;
+		else
+			type = VFP_SNAN;
+	} else if (s->exponent == 0) {
+		if (s->significand == 0)
+			type |= VFP_ZERO;
+		else
+			type |= VFP_DENORMAL;
+	}
+	return type;
+}
+
+u32 vfp_double_normaliseround(ARMul_State* state, int dd, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func);
+
+u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand);
+
+/*
+ * A special flag to tell the normalisation code not to normalise.
+ */
+#define VFP_NAN_FLAG	0x100
+
+/*
+ * A bit pattern used to indicate the initial (unset) value of the
+ * exception mask, in case nothing handles an instruction.  This
+ * doesn't include the NAN flag, which get masked out before
+ * we check for an error.
+ */
+#define VFP_EXCEPTION_ERROR	((u32)-1 & ~VFP_NAN_FLAG)
+
+/*
+ * A flag to tell vfp instruction type.
+ *  OP_SCALAR - this operation always operates in scalar mode
+ *  OP_SD - the instruction exceptionally writes to a single precision result.
+ *  OP_DD - the instruction exceptionally writes to a double precision result.
+ *  OP_SM - the instruction exceptionally reads from a single precision operand.
+ */
+#define OP_SCALAR	(1 << 0)
+#define OP_SD		(1 << 1)
+#define OP_DD		(1 << 1)
+#define OP_SM		(1 << 2)
+
+struct op {
+	u32 (* const fn)(ARMul_State* state, int dd, int dn, int dm, u32 fpscr);
+	u32 flags;
+};
+
+static inline u32 fls(int x)
+{
+	int r = 32;
+
+	if (!x)
+		return 0;
+	if (!(x & 0xffff0000u)) {
+		x <<= 16;
+		r -= 16;
+	}
+	if (!(x & 0xff000000u)) {
+		x <<= 8;
+		r -= 8;
+	}
+	if (!(x & 0xf0000000u)) {
+		x <<= 4;
+		r -= 4;
+	}
+	if (!(x & 0xc0000000u)) {
+		x <<= 2;
+		r -= 2;
+	}
+	if (!(x & 0x80000000u)) {
+		x <<= 1;
+		r -= 1;
+	}
+	return r;
+
+}
+
+#endif
diff --git a/src/core/arm/interpreter/vfp/vfpdouble.cpp b/src/core/arm/interpreter/vfp/vfpdouble.cpp
new file mode 100644
index 000000000..cd5b5afa4
--- /dev/null
+++ b/src/core/arm/interpreter/vfp/vfpdouble.cpp
@@ -0,0 +1,1263 @@
+/*
+    vfp/vfpdouble.c - ARM VFPv3 emulation unit - SoftFloat double instruction
+    Copyright (C) 2003 Skyeye Develop Group
+    for help please send mail to <skyeye-developer@lists.gro.clinux.org>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/*
+ * This code is derived in part from :
+ * - Android kernel
+ * - John R. Housers softfloat library, which
+ * carries the following notice:
+ *
+ * ===========================================================================
+ * This C source file is part of the SoftFloat IEC/IEEE Floating-point
+ * Arithmetic Package, Release 2.
+ *
+ * Written by John R. Hauser.  This work was made possible in part by the
+ * International Computer Science Institute, located at Suite 600, 1947 Center
+ * Street, Berkeley, California 94704.  Funding was partially provided by the
+ * National Science Foundation under grant MIP-9311980.  The original version
+ * of this code was written as part of a project to build a fixed-point vector
+ * processor in collaboration with the University of California at Berkeley,
+ * overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+ * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
+ * arithmetic/softfloat.html'.
+ *
+ * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
+ * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
+ * TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
+ * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
+ * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
+ *
+ * Derivative works are acceptable, even for commercial purposes, so long as
+ * (1) they include prominent notice that the work is derivative, and (2) they
+ * include prominent notice akin to these three paragraphs for those parts of
+ * this code that are retained.
+ * ===========================================================================
+ */
+ 
+#include "core/arm/interpreter/vfp/vfp.h"
+#include "core/arm/interpreter/vfp/vfp_helper.h"
+#include "core/arm/interpreter/vfp/asm_vfp.h"
+
+static struct vfp_double vfp_double_default_qnan = {
+	//.exponent	= 2047,
+	//.sign		= 0,
+	//.significand	= VFP_DOUBLE_SIGNIFICAND_QNAN,
+};
+
+static void vfp_double_dump(const char *str, struct vfp_double *d)
+{
+	pr_debug("VFP: %s: sign=%d exponent=%d significand=%016llx\n",
+		 str, d->sign != 0, d->exponent, d->significand);
+}
+
+static void vfp_double_normalise_denormal(struct vfp_double *vd)
+{
+	int bits = 31 - fls(vd->significand >> 32);
+	if (bits == 31)
+		bits = 63 - fls(vd->significand);
+
+	vfp_double_dump("normalise_denormal: in", vd);
+
+	if (bits) {
+		vd->exponent -= bits - 1;
+		vd->significand <<= bits;
+	}
+
+	vfp_double_dump("normalise_denormal: out", vd);
+}
+
+u32 vfp_double_normaliseround(ARMul_State* state, int dd, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func)
+{
+	u64 significand, incr;
+	int exponent, shift, underflow;
+	u32 rmode;
+
+	vfp_double_dump("pack: in", vd);
+
+	/*
+	 * Infinities and NaNs are a special case.
+	 */
+	if (vd->exponent == 2047 && (vd->significand == 0 || exceptions))
+		goto pack;
+
+	/*
+	 * Special-case zero.
+	 */
+	if (vd->significand == 0) {
+		vd->exponent = 0;
+		goto pack;
+	}
+
+	exponent = vd->exponent;
+	significand = vd->significand;
+
+	shift = 32 - fls(significand >> 32);
+	if (shift == 32)
+		shift = 64 - fls(significand);
+	if (shift) {
+		exponent -= shift;
+		significand <<= shift;
+	}
+
+#if 1
+	vd->exponent = exponent;
+	vd->significand = significand;
+	vfp_double_dump("pack: normalised", vd);
+#endif
+
+	/*
+	 * Tiny number?
+	 */
+	underflow = exponent < 0;
+	if (underflow) {
+		significand = vfp_shiftright64jamming(significand, -exponent);
+		exponent = 0;
+#if 1
+		vd->exponent = exponent;
+		vd->significand = significand;
+		vfp_double_dump("pack: tiny number", vd);
+#endif
+		if (!(significand & ((1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1)))
+			underflow = 0;
+	}
+
+	/*
+	 * Select rounding increment.
+	 */
+	incr = 0;
+	rmode = fpscr & FPSCR_RMODE_MASK;
+
+	if (rmode == FPSCR_ROUND_NEAREST) {
+		incr = 1ULL << VFP_DOUBLE_LOW_BITS;
+		if ((significand & (1ULL << (VFP_DOUBLE_LOW_BITS + 1))) == 0)
+			incr -= 1;
+	} else if (rmode == FPSCR_ROUND_TOZERO) {
+		incr = 0;
+	} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vd->sign != 0))
+		incr = (1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1;
+
+	pr_debug("VFP: rounding increment = 0x%08llx\n", incr);
+
+	/*
+	 * Is our rounding going to overflow?
+	 */
+	if ((significand + incr) < significand) {
+		exponent += 1;
+		significand = (significand >> 1) | (significand & 1);
+		incr >>= 1;
+#if 1
+		vd->exponent = exponent;
+		vd->significand = significand;
+		vfp_double_dump("pack: overflow", vd);
+#endif
+	}
+
+	/*
+	 * If any of the low bits (which will be shifted out of the
+	 * number) are non-zero, the result is inexact.
+	 */
+	if (significand & ((1 << (VFP_DOUBLE_LOW_BITS + 1)) - 1))
+		exceptions |= FPSCR_IXC;
+
+	/*
+	 * Do our rounding.
+	 */
+	significand += incr;
+
+	/*
+	 * Infinity?
+	 */
+	if (exponent >= 2046) {
+		exceptions |= FPSCR_OFC | FPSCR_IXC;
+		if (incr == 0) {
+			vd->exponent = 2045;
+			vd->significand = 0x7fffffffffffffffULL;
+		} else {
+			vd->exponent = 2047;		/* infinity */
+			vd->significand = 0;
+		}
+	} else {
+		if (significand >> (VFP_DOUBLE_LOW_BITS + 1) == 0)
+			exponent = 0;
+		if (exponent || significand > 0x8000000000000000ULL)
+			underflow = 0;
+		if (underflow)
+			exceptions |= FPSCR_UFC;
+		vd->exponent = exponent;
+		vd->significand = significand >> 1;
+	}
+
+ pack:
+	vfp_double_dump("pack: final", vd);
+	{
+		s64 d = vfp_double_pack(vd);
+		pr_debug("VFP: %s: d(d%d)=%016llx exceptions=%08x\n", func,
+			 dd, d, exceptions);
+		vfp_put_double(state, d, dd);
+	}
+	return exceptions;
+}
+
+/*
+ * Propagate the NaN, setting exceptions if it is signalling.
+ * 'n' is always a NaN.  'm' may be a number, NaN or infinity.
+ */
+static u32
+vfp_propagate_nan(struct vfp_double *vdd, struct vfp_double *vdn,
+		  struct vfp_double *vdm, u32 fpscr)
+{
+	struct vfp_double *nan;
+	int tn, tm = 0;
+
+	tn = vfp_double_type(vdn);
+
+	if (vdm)
+		tm = vfp_double_type(vdm);
+
+	if (fpscr & FPSCR_DEFAULT_NAN)
+		/*
+		 * Default NaN mode - always returns a quiet NaN
+		 */
+		nan = &vfp_double_default_qnan;
+	else {
+		/*
+		 * Contemporary mode - select the first signalling
+		 * NAN, or if neither are signalling, the first
+		 * quiet NAN.
+		 */
+		if (tn == VFP_SNAN || (tm != VFP_SNAN && tn == VFP_QNAN))
+			nan = vdn;
+		else
+			nan = vdm;
+		/*
+		 * Make the NaN quiet.
+		 */
+		nan->significand |= VFP_DOUBLE_SIGNIFICAND_QNAN;
+	}
+
+	*vdd = *nan;
+
+	/*
+	 * If one was a signalling NAN, raise invalid operation.
+	 */
+	return tn == VFP_SNAN || tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG;
+}
+
+/*
+ * Extended operations
+ */
+static u32 vfp_double_fabs(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+	pr_debug("In %s\n", __FUNCTION__);
+	vfp_put_double(state, vfp_double_packed_abs(vfp_get_double(state, dm)), dd);
+	return 0;
+}
+
+static u32 vfp_double_fcpy(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+	pr_debug("In %s\n", __FUNCTION__);
+	vfp_put_double(state, vfp_get_double(state, dm), dd);
+	return 0;
+}
+
+static u32 vfp_double_fneg(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+	pr_debug("In %s\n", __FUNCTION__);
+	vfp_put_double(state, vfp_double_packed_negate(vfp_get_double(state, dm)), dd);
+	return 0;
+}
+
+static u32 vfp_double_fsqrt(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+	pr_debug("In %s\n", __FUNCTION__);
+	struct vfp_double vdm, vdd, *vdp;
+	int ret, tm;
+
+	vfp_double_unpack(&vdm, vfp_get_double(state, dm));
+	tm = vfp_double_type(&vdm);
+	if (tm & (VFP_NAN|VFP_INFINITY)) {
+		vdp = &vdd;
+
+		if (tm & VFP_NAN)
+			ret = vfp_propagate_nan(vdp, &vdm, NULL, fpscr);
+		else if (vdm.sign == 0) {
+ sqrt_copy:
+			vdp = &vdm;
+			ret = 0;
+		} else {
+ sqrt_invalid:
+			vdp = &vfp_double_default_qnan;
+			ret = FPSCR_IOC;
+		}
+		vfp_put_double(state, vfp_double_pack(vdp), dd);
+		return ret;
+	}
+
+	/*
+	 * sqrt(+/- 0) == +/- 0
+	 */
+	if (tm & VFP_ZERO)
+		goto sqrt_copy;
+
+	/*
+	 * Normalise a denormalised number
+	 */
+	if (tm & VFP_DENORMAL)
+		vfp_double_normalise_denormal(&vdm);
+
+	/*
+	 * sqrt(<0) = invalid
+	 */
+	if (vdm.sign)
+		goto sqrt_invalid;
+
+	vfp_double_dump("sqrt", &vdm);
+
+	/*
+	 * Estimate the square root.
+	 */
+	vdd.sign = 0;
+	vdd.exponent = ((vdm.exponent - 1023) >> 1) + 1023;
+	vdd.significand = (u64)vfp_estimate_sqrt_significand(vdm.exponent, vdm.significand >> 32) << 31;
+
+	vfp_double_dump("sqrt estimate1", &vdd);
+
+	vdm.significand >>= 1 + (vdm.exponent & 1);
+	vdd.significand += 2 + vfp_estimate_div128to64(vdm.significand, 0, vdd.significand);
+
+	vfp_double_dump("sqrt estimate2", &vdd);
+
+	/*
+	 * And now adjust.
+	 */
+	if ((vdd.significand & VFP_DOUBLE_LOW_BITS_MASK) <= 5) {
+		if (vdd.significand < 2) {
+			vdd.significand = ~0ULL;
+		} else {
+			u64 termh, terml, remh, reml;
+			vdm.significand <<= 2;
+			mul64to128(&termh, &terml, vdd.significand, vdd.significand);
+			sub128(&remh, &reml, vdm.significand, 0, termh, terml);
+			while ((s64)remh < 0) {
+				vdd.significand -= 1;
+				shift64left(&termh, &terml, vdd.significand);
+				terml |= 1;
+				add128(&remh, &reml, remh, reml, termh, terml);
+			}
+			vdd.significand |= (remh | reml) != 0;
+		}
+	}
+	vdd.significand = vfp_shiftright64jamming(vdd.significand, 1);
+
+	return vfp_double_normaliseround(state, dd, &vdd, fpscr, 0, "fsqrt");
+}
+
+/*
+ * Equal	:= ZC
+ * Less than	:= N
+ * Greater than	:= C
+ * Unordered	:= CV
+ */
+static u32 vfp_compare(ARMul_State* state, int dd, int signal_on_qnan, int dm, u32 fpscr)
+{
+	s64 d, m;
+	u32 ret = 0;
+
+	pr_debug("In %s, state=0x%x, fpscr=0x%x\n", __FUNCTION__, state, fpscr);
+	m = vfp_get_double(state, dm);
+	if (vfp_double_packed_exponent(m) == 2047 && vfp_double_packed_mantissa(m)) {
+		ret |= FPSCR_C | FPSCR_V;
+		if (signal_on_qnan || !(vfp_double_packed_mantissa(m) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1))))
+			/*
+			 * Signalling NaN, or signalling on quiet NaN
+			 */
+			ret |= FPSCR_IOC;
+	}
+
+	d = vfp_get_double(state, dd);
+	if (vfp_double_packed_exponent(d) == 2047 && vfp_double_packed_mantissa(d)) {
+		ret |= FPSCR_C | FPSCR_V;
+		if (signal_on_qnan || !(vfp_double_packed_mantissa(d) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1))))
+			/*
+			 * Signalling NaN, or signalling on quiet NaN
+			 */
+			ret |= FPSCR_IOC;
+	}
+
+	if (ret == 0) {
+		//printf("In %s, d=%lld, m =%lld\n ", __FUNCTION__, d, m);
+		if (d == m || vfp_double_packed_abs(d | m) == 0) {
+			/*
+			 * equal
+			 */
+			ret |= FPSCR_Z | FPSCR_C;
+			//printf("In %s,1 ret=0x%x\n", __FUNCTION__, ret);
+		} else if (vfp_double_packed_sign(d ^ m)) {
+			/*
+			 * different signs
+			 */
+			if (vfp_double_packed_sign(d))
+				/*
+				 * d is negative, so d < m
+				 */
+				ret |= FPSCR_N;
+			else
+				/*
+				 * d is positive, so d > m
+				 */
+				ret |= FPSCR_C;
+		} else if ((vfp_double_packed_sign(d) != 0) ^ (d < m)) {
+			/*
+			 * d < m
+			 */
+			ret |= FPSCR_N;
+		} else if ((vfp_double_packed_sign(d) != 0) ^ (d > m)) {
+			/*
+			 * d > m
+			 */
+			ret |= FPSCR_C;
+		}
+	}
+	pr_debug("In %s, state=0x%x, ret=0x%x\n", __FUNCTION__, state, ret);
+
+	return ret;
+}
+
+static u32 vfp_double_fcmp(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+	pr_debug("In %s\n", __FUNCTION__);
+	return vfp_compare(state, dd, 0, dm, fpscr);
+}
+
+static u32 vfp_double_fcmpe(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+	pr_debug("In %s\n", __FUNCTION__);
+	return vfp_compare(state, dd, 1, dm, fpscr);
+}
+
+static u32 vfp_double_fcmpz(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+	pr_debug("In %s\n", __FUNCTION__);
+	return vfp_compare(state, dd, 0, VFP_REG_ZERO, fpscr);
+}
+
+static u32 vfp_double_fcmpez(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+	pr_debug("In %s\n", __FUNCTION__);
+	return vfp_compare(state, dd, 1, VFP_REG_ZERO, fpscr);
+}
+
+static u32 vfp_double_fcvts(ARMul_State* state, int sd, int unused, int dm, u32 fpscr)
+{
+	struct vfp_double vdm;
+	struct vfp_single vsd;
+	int tm;
+	u32 exceptions = 0;
+
+	pr_debug("In %s\n", __FUNCTION__);
+	vfp_double_unpack(&vdm, vfp_get_double(state, dm));
+
+	tm = vfp_double_type(&vdm);
+
+	/*
+	 * If we have a signalling NaN, signal invalid operation.
+	 */
+	if (tm == VFP_SNAN)
+		exceptions = FPSCR_IOC;
+
+	if (tm & VFP_DENORMAL)
+		vfp_double_normalise_denormal(&vdm);
+
+	vsd.sign = vdm.sign;
+	vsd.significand = vfp_hi64to32jamming(vdm.significand);
+
+	/*
+	 * If we have an infinity or a NaN, the exponent must be 255
+	 */
+	if (tm & (VFP_INFINITY|VFP_NAN)) {
+		vsd.exponent = 255;
+		if (tm == VFP_QNAN)
+			vsd.significand |= VFP_SINGLE_SIGNIFICAND_QNAN;
+		goto pack_nan;
+	} else if (tm & VFP_ZERO)
+		vsd.exponent = 0;
+	else
+		vsd.exponent = vdm.exponent - (1023 - 127);
+
+	return vfp_single_normaliseround(state, sd, &vsd, fpscr, exceptions, "fcvts");
+
+ pack_nan:
+	vfp_put_float(state, vfp_single_pack(&vsd), sd);
+	return exceptions;
+}
+
+static u32 vfp_double_fuito(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+	struct vfp_double vdm;
+	u32 m = vfp_get_float(state, dm);
+
+	pr_debug("In %s\n", __FUNCTION__);
+	vdm.sign = 0;
+	vdm.exponent = 1023 + 63 - 1;
+	vdm.significand = (u64)m;
+
+	return vfp_double_normaliseround(state, dd, &vdm, fpscr, 0, "fuito");
+}
+
+static u32 vfp_double_fsito(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+	struct vfp_double vdm;
+	u32 m = vfp_get_float(state, dm);
+
+	pr_debug("In %s\n", __FUNCTION__);
+	vdm.sign = (m & 0x80000000) >> 16;
+	vdm.exponent = 1023 + 63 - 1;
+	vdm.significand = vdm.sign ? -m : m;
+
+	return vfp_double_normaliseround(state, dd, &vdm, fpscr, 0, "fsito");
+}
+
+static u32 vfp_double_ftoui(ARMul_State* state, int sd, int unused, int dm, u32 fpscr)
+{
+	struct vfp_double vdm;
+	u32 d, exceptions = 0;
+	int rmode = fpscr & FPSCR_RMODE_MASK;
+	int tm;
+
+	pr_debug("In %s\n", __FUNCTION__);
+	vfp_double_unpack(&vdm, vfp_get_double(state, dm));
+
+	/*
+	 * Do we have a denormalised number?
+	 */
+	tm = vfp_double_type(&vdm);
+	if (tm & VFP_DENORMAL)
+		exceptions |= FPSCR_IDC;
+
+	if (tm & VFP_NAN)
+		vdm.sign = 0;
+
+	if (vdm.exponent >= 1023 + 32) {
+		d = vdm.sign ? 0 : 0xffffffff;
+		exceptions = FPSCR_IOC;
+	} else if (vdm.exponent >= 1023 - 1) {
+		int shift = 1023 + 63 - vdm.exponent;
+		u64 rem, incr = 0;
+
+		/*
+		 * 2^0 <= m < 2^32-2^8
+		 */
+		d = (vdm.significand << 1) >> shift;
+		rem = vdm.significand << (65 - shift);
+
+		if (rmode == FPSCR_ROUND_NEAREST) {
+			incr = 0x8000000000000000ULL;
+			if ((d & 1) == 0)
+				incr -= 1;
+		} else if (rmode == FPSCR_ROUND_TOZERO) {
+			incr = 0;
+		} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vdm.sign != 0)) {
+			incr = ~0ULL;
+		}
+
+		if ((rem + incr) < rem) {
+			if (d < 0xffffffff)
+				d += 1;
+			else
+				exceptions |= FPSCR_IOC;
+		}
+
+		if (d && vdm.sign) {
+			d = 0;
+			exceptions |= FPSCR_IOC;
+		} else if (rem)
+			exceptions |= FPSCR_IXC;
+	} else {
+		d = 0;
+		if (vdm.exponent | vdm.significand) {
+			exceptions |= FPSCR_IXC;
+			if (rmode == FPSCR_ROUND_PLUSINF && vdm.sign == 0)
+				d = 1;
+			else if (rmode == FPSCR_ROUND_MINUSINF && vdm.sign) {
+				d = 0;
+				exceptions |= FPSCR_IOC;
+			}
+		}
+	}
+
+	pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
+
+	vfp_put_float(state, d, sd);
+
+	return exceptions;
+}
+
+static u32 vfp_double_ftouiz(ARMul_State* state, int sd, int unused, int dm, u32 fpscr)
+{
+	pr_debug("In %s\n", __FUNCTION__);
+	return vfp_double_ftoui(state, sd, unused, dm, FPSCR_ROUND_TOZERO);
+}
+
+static u32 vfp_double_ftosi(ARMul_State* state, int sd, int unused, int dm, u32 fpscr)
+{
+	struct vfp_double vdm;
+	u32 d, exceptions = 0;
+	int rmode = fpscr & FPSCR_RMODE_MASK;
+	int tm;
+
+	pr_debug("In %s\n", __FUNCTION__);
+	vfp_double_unpack(&vdm, vfp_get_double(state, dm));
+	vfp_double_dump("VDM", &vdm);
+
+	/*
+	 * Do we have denormalised number?
+	 */
+	tm = vfp_double_type(&vdm);
+	if (tm & VFP_DENORMAL)
+		exceptions |= FPSCR_IDC;
+
+	if (tm & VFP_NAN) {
+		d = 0;
+		exceptions |= FPSCR_IOC;
+	} else if (vdm.exponent >= 1023 + 32) {
+		d = 0x7fffffff;
+		if (vdm.sign)
+			d = ~d;
+		exceptions |= FPSCR_IOC;
+	} else if (vdm.exponent >= 1023 - 1) {
+		int shift = 1023 + 63 - vdm.exponent;	/* 58 */
+		u64 rem, incr = 0;
+
+		d = (vdm.significand << 1) >> shift;
+		rem = vdm.significand << (65 - shift);
+
+		if (rmode == FPSCR_ROUND_NEAREST) {
+			incr = 0x8000000000000000ULL;
+			if ((d & 1) == 0)
+				incr -= 1;
+		} else if (rmode == FPSCR_ROUND_TOZERO) {
+			incr = 0;
+		} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vdm.sign != 0)) {
+			incr = ~0ULL;
+		}
+
+		if ((rem + incr) < rem && d < 0xffffffff)
+			d += 1;
+		if (d > 0x7fffffff + (vdm.sign != 0)) {
+			d = 0x7fffffff + (vdm.sign != 0);
+			exceptions |= FPSCR_IOC;
+		} else if (rem)
+			exceptions |= FPSCR_IXC;
+
+		if (vdm.sign)
+			d = -d;
+	} else {
+		d = 0;
+		if (vdm.exponent | vdm.significand) {
+			exceptions |= FPSCR_IXC;
+			if (rmode == FPSCR_ROUND_PLUSINF && vdm.sign == 0)
+				d = 1;
+			else if (rmode == FPSCR_ROUND_MINUSINF && vdm.sign)
+				d = -1;
+		}
+	}
+
+	pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
+
+	vfp_put_float(state, (s32)d, sd);
+
+	return exceptions;
+}
+
+static u32 vfp_double_ftosiz(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+	pr_debug("In %s\n", __FUNCTION__);
+	return vfp_double_ftosi(state, dd, unused, dm, FPSCR_ROUND_TOZERO);
+}
+
+static struct op fops_ext[] = {
+    { vfp_double_fcpy,   0 },                 //0x00000000 - FEXT_FCPY
+    { vfp_double_fabs,   0 },                 //0x00000001 - FEXT_FABS
+    { vfp_double_fneg,   0 },                 //0x00000002 - FEXT_FNEG
+    { vfp_double_fsqrt,  0 },                 //0x00000003 - FEXT_FSQRT
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { vfp_double_fcmp,   OP_SCALAR },         //0x00000008 - FEXT_FCMP
+    { vfp_double_fcmpe,  OP_SCALAR },         //0x00000009 - FEXT_FCMPE
+    { vfp_double_fcmpz,  OP_SCALAR },         //0x0000000A - FEXT_FCMPZ
+    { vfp_double_fcmpez, OP_SCALAR },         //0x0000000B - FEXT_FCMPEZ
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { vfp_double_fcvts,  OP_SCALAR|OP_DD },   //0x0000000F - FEXT_FCVT
+    { vfp_double_fuito,  OP_SCALAR },         //0x00000010 - FEXT_FUITO
+    { vfp_double_fsito,  OP_SCALAR },         //0x00000011 - FEXT_FSITO
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { vfp_double_ftoui,  OP_SCALAR },         //0x00000018 - FEXT_FTOUI
+    { vfp_double_ftouiz, OP_SCALAR },         //0x00000019 - FEXT_FTOUIZ
+    { vfp_double_ftosi,  OP_SCALAR },         //0x0000001A - FEXT_FTOSI
+    { vfp_double_ftosiz, OP_SCALAR },         //0x0000001B - FEXT_FTOSIZ
+};
+
+
+
+
+static u32
+vfp_double_fadd_nonnumber(struct vfp_double *vdd, struct vfp_double *vdn,
+			  struct vfp_double *vdm, u32 fpscr)
+{
+	struct vfp_double *vdp;
+	u32 exceptions = 0;
+	int tn, tm;
+
+	tn = vfp_double_type(vdn);
+	tm = vfp_double_type(vdm);
+
+	if (tn & tm & VFP_INFINITY) {
+		/*
+		 * Two infinities.  Are they different signs?
+		 */
+		if (vdn->sign ^ vdm->sign) {
+			/*
+			 * different signs -> invalid
+			 */
+			exceptions = FPSCR_IOC;
+			vdp = &vfp_double_default_qnan;
+		} else {
+			/*
+			 * same signs -> valid
+			 */
+			vdp = vdn;
+		}
+	} else if (tn & VFP_INFINITY && tm & VFP_NUMBER) {
+		/*
+		 * One infinity and one number -> infinity
+		 */
+		vdp = vdn;
+	} else {
+		/*
+		 * 'n' is a NaN of some type
+		 */
+		return vfp_propagate_nan(vdd, vdn, vdm, fpscr);
+	}
+	*vdd = *vdp;
+	return exceptions;
+}
+
+static u32
+vfp_double_add(struct vfp_double *vdd, struct vfp_double *vdn,
+	       struct vfp_double *vdm, u32 fpscr)
+{
+	u32 exp_diff;
+	u64 m_sig;
+
+	if (vdn->significand & (1ULL << 63) ||
+	    vdm->significand & (1ULL << 63)) {
+		pr_info("VFP: bad FP values\n");
+		vfp_double_dump("VDN", vdn);
+		vfp_double_dump("VDM", vdm);
+	}
+
+	/*
+	 * Ensure that 'n' is the largest magnitude number.  Note that
+	 * if 'n' and 'm' have equal exponents, we do not swap them.
+	 * This ensures that NaN propagation works correctly.
+	 */
+	if (vdn->exponent < vdm->exponent) {
+		struct vfp_double *t = vdn;
+		vdn = vdm;
+		vdm = t;
+	}
+
+	/*
+	 * Is 'n' an infinity or a NaN?  Note that 'm' may be a number,
+	 * infinity or a NaN here.
+	 */
+	if (vdn->exponent == 2047)
+		return vfp_double_fadd_nonnumber(vdd, vdn, vdm, fpscr);
+
+	/*
+	 * We have two proper numbers, where 'vdn' is the larger magnitude.
+	 *
+	 * Copy 'n' to 'd' before doing the arithmetic.
+	 */
+	*vdd = *vdn;
+
+	/*
+	 * Align 'm' with the result.
+	 */
+	exp_diff = vdn->exponent - vdm->exponent;
+	m_sig = vfp_shiftright64jamming(vdm->significand, exp_diff);
+
+	/*
+	 * If the signs are different, we are really subtracting.
+	 */
+	if (vdn->sign ^ vdm->sign) {
+		m_sig = vdn->significand - m_sig;
+		if ((s64)m_sig < 0) {
+			vdd->sign = vfp_sign_negate(vdd->sign);
+			m_sig = -m_sig;
+		} else if (m_sig == 0) {
+			vdd->sign = (fpscr & FPSCR_RMODE_MASK) ==
+				      FPSCR_ROUND_MINUSINF ? 0x8000 : 0;
+		}
+	} else {
+		m_sig += vdn->significand;
+	}
+	vdd->significand = m_sig;
+
+	return 0;
+}
+
+static u32
+vfp_double_multiply(struct vfp_double *vdd, struct vfp_double *vdn,
+		    struct vfp_double *vdm, u32 fpscr)
+{
+	vfp_double_dump("VDN", vdn);
+	vfp_double_dump("VDM", vdm);
+
+	/*
+	 * Ensure that 'n' is the largest magnitude number.  Note that
+	 * if 'n' and 'm' have equal exponents, we do not swap them.
+	 * This ensures that NaN propagation works correctly.
+	 */
+	if (vdn->exponent < vdm->exponent) {
+		struct vfp_double *t = vdn;
+		vdn = vdm;
+		vdm = t;
+		pr_debug("VFP: swapping M <-> N\n");
+	}
+
+	vdd->sign = vdn->sign ^ vdm->sign;
+
+	/*
+	 * If 'n' is an infinity or NaN, handle it.  'm' may be anything.
+	 */
+	if (vdn->exponent == 2047) {
+		if (vdn->significand || (vdm->exponent == 2047 && vdm->significand))
+			return vfp_propagate_nan(vdd, vdn, vdm, fpscr);
+		if ((vdm->exponent | vdm->significand) == 0) {
+			*vdd = vfp_double_default_qnan;
+			return FPSCR_IOC;
+		}
+		vdd->exponent = vdn->exponent;
+		vdd->significand = 0;
+		return 0;
+	}
+
+	/*
+	 * If 'm' is zero, the result is always zero.  In this case,
+	 * 'n' may be zero or a number, but it doesn't matter which.
+	 */
+	if ((vdm->exponent | vdm->significand) == 0) {
+		vdd->exponent = 0;
+		vdd->significand = 0;
+		return 0;
+	}
+
+	/*
+	 * We add 2 to the destination exponent for the same reason
+	 * as the addition case - though this time we have +1 from
+	 * each input operand.
+	 */
+	vdd->exponent = vdn->exponent + vdm->exponent - 1023 + 2;
+	vdd->significand = vfp_hi64multiply64(vdn->significand, vdm->significand);
+
+	vfp_double_dump("VDD", vdd);
+	return 0;
+}
+
+#define NEG_MULTIPLY	(1 << 0)
+#define NEG_SUBTRACT	(1 << 1)
+
+static u32
+vfp_double_multiply_accumulate(ARMul_State* state, int dd, int dn, int dm, u32 fpscr, u32 negate, char *func)
+{
+	struct vfp_double vdd, vdp, vdn, vdm;
+	u32 exceptions;
+
+	vfp_double_unpack(&vdn, vfp_get_double(state, dn));
+	if (vdn.exponent == 0 && vdn.significand)
+		vfp_double_normalise_denormal(&vdn);
+
+	vfp_double_unpack(&vdm, vfp_get_double(state, dm));
+	if (vdm.exponent == 0 && vdm.significand)
+		vfp_double_normalise_denormal(&vdm);
+
+	exceptions = vfp_double_multiply(&vdp, &vdn, &vdm, fpscr);
+	if (negate & NEG_MULTIPLY)
+		vdp.sign = vfp_sign_negate(vdp.sign);
+
+	vfp_double_unpack(&vdn, vfp_get_double(state, dd));
+	if (negate & NEG_SUBTRACT)
+		vdn.sign = vfp_sign_negate(vdn.sign);
+
+	exceptions |= vfp_double_add(&vdd, &vdn, &vdp, fpscr);
+
+	return vfp_double_normaliseround(state, dd, &vdd, fpscr, exceptions, func);
+}
+
+/*
+ * Standard operations
+ */
+
+/*
+ * sd = sd + (sn * sm)
+ */
+static u32 vfp_double_fmac(ARMul_State* state, int dd, int dn, int dm, u32 fpscr)
+{
+	pr_debug("In %s\n", __FUNCTION__);
+	return vfp_double_multiply_accumulate(state, dd, dn, dm, fpscr, 0, "fmac");
+}
+
+/*
+ * sd = sd - (sn * sm)
+ */
+static u32 vfp_double_fnmac(ARMul_State* state, int dd, int dn, int dm, u32 fpscr)
+{
+	pr_debug("In %s\n", __FUNCTION__);
+	return vfp_double_multiply_accumulate(state, dd, dn, dm, fpscr, NEG_MULTIPLY, "fnmac");
+}
+
+/*
+ * sd = -sd + (sn * sm)
+ */
+static u32 vfp_double_fmsc(ARMul_State* state, int dd, int dn, int dm, u32 fpscr)
+{
+	pr_debug("In %s\n", __FUNCTION__);
+	return vfp_double_multiply_accumulate(state, dd, dn, dm, fpscr, NEG_SUBTRACT, "fmsc");
+}
+
+/*
+ * sd = -sd - (sn * sm)
+ */
+static u32 vfp_double_fnmsc(ARMul_State* state, int dd, int dn, int dm, u32 fpscr)
+{
+	pr_debug("In %s\n", __FUNCTION__);
+	return vfp_double_multiply_accumulate(state, dd, dn, dm, fpscr, NEG_SUBTRACT | NEG_MULTIPLY, "fnmsc");
+}
+
+/*
+ * sd = sn * sm
+ */
+static u32 vfp_double_fmul(ARMul_State* state, int dd, int dn, int dm, u32 fpscr)
+{
+	struct vfp_double vdd, vdn, vdm;
+	u32 exceptions;
+
+	pr_debug("In %s\n", __FUNCTION__);
+	vfp_double_unpack(&vdn, vfp_get_double(state, dn));
+	if (vdn.exponent == 0 && vdn.significand)
+		vfp_double_normalise_denormal(&vdn);
+
+	vfp_double_unpack(&vdm, vfp_get_double(state, dm));
+	if (vdm.exponent == 0 && vdm.significand)
+		vfp_double_normalise_denormal(&vdm);
+
+	exceptions = vfp_double_multiply(&vdd, &vdn, &vdm, fpscr);
+	return vfp_double_normaliseround(state, dd, &vdd, fpscr, exceptions, "fmul");
+}
+
+/*
+ * sd = -(sn * sm)
+ */
+static u32 vfp_double_fnmul(ARMul_State* state, int dd, int dn, int dm, u32 fpscr)
+{
+	struct vfp_double vdd, vdn, vdm;
+	u32 exceptions;
+
+	pr_debug("In %s\n", __FUNCTION__);
+	vfp_double_unpack(&vdn, vfp_get_double(state, dn));
+	if (vdn.exponent == 0 && vdn.significand)
+		vfp_double_normalise_denormal(&vdn);
+
+	vfp_double_unpack(&vdm, vfp_get_double(state, dm));
+	if (vdm.exponent == 0 && vdm.significand)
+		vfp_double_normalise_denormal(&vdm);
+
+	exceptions = vfp_double_multiply(&vdd, &vdn, &vdm, fpscr);
+	vdd.sign = vfp_sign_negate(vdd.sign);
+
+	return vfp_double_normaliseround(state, dd, &vdd, fpscr, exceptions, "fnmul");
+}
+
+/*
+ * sd = sn + sm
+ */
+static u32 vfp_double_fadd(ARMul_State* state, int dd, int dn, int dm, u32 fpscr)
+{
+	struct vfp_double vdd, vdn, vdm;
+	u32 exceptions;
+
+	pr_debug("In %s\n", __FUNCTION__);
+	vfp_double_unpack(&vdn, vfp_get_double(state, dn));
+	if (vdn.exponent == 0 && vdn.significand)
+		vfp_double_normalise_denormal(&vdn);
+
+	vfp_double_unpack(&vdm, vfp_get_double(state, dm));
+	if (vdm.exponent == 0 && vdm.significand)
+		vfp_double_normalise_denormal(&vdm);
+
+	exceptions = vfp_double_add(&vdd, &vdn, &vdm, fpscr);
+
+	return vfp_double_normaliseround(state, dd, &vdd, fpscr, exceptions, "fadd");
+}
+
+/*
+ * sd = sn - sm
+ */
+static u32 vfp_double_fsub(ARMul_State* state, int dd, int dn, int dm, u32 fpscr)
+{
+	struct vfp_double vdd, vdn, vdm;
+	u32 exceptions;
+
+	pr_debug("In %s\n", __FUNCTION__);
+	vfp_double_unpack(&vdn, vfp_get_double(state, dn));
+	if (vdn.exponent == 0 && vdn.significand)
+		vfp_double_normalise_denormal(&vdn);
+
+	vfp_double_unpack(&vdm, vfp_get_double(state, dm));
+	if (vdm.exponent == 0 && vdm.significand)
+		vfp_double_normalise_denormal(&vdm);
+
+	/*
+	 * Subtraction is like addition, but with a negated operand.
+	 */
+	vdm.sign = vfp_sign_negate(vdm.sign);
+
+	exceptions = vfp_double_add(&vdd, &vdn, &vdm, fpscr);
+
+	return vfp_double_normaliseround(state, dd, &vdd, fpscr, exceptions, "fsub");
+}
+
+/*
+ * sd = sn / sm
+ */
+static u32 vfp_double_fdiv(ARMul_State* state, int dd, int dn, int dm, u32 fpscr)
+{
+	struct vfp_double vdd, vdn, vdm;
+	u32 exceptions = 0;
+	int tm, tn;
+
+	pr_debug("In %s\n", __FUNCTION__);
+	vfp_double_unpack(&vdn, vfp_get_double(state, dn));
+	vfp_double_unpack(&vdm, vfp_get_double(state, dm));
+
+	vdd.sign = vdn.sign ^ vdm.sign;
+
+	tn = vfp_double_type(&vdn);
+	tm = vfp_double_type(&vdm);
+
+	/*
+	 * Is n a NAN?
+	 */
+	if (tn & VFP_NAN)
+		goto vdn_nan;
+
+	/*
+	 * Is m a NAN?
+	 */
+	if (tm & VFP_NAN)
+		goto vdm_nan;
+
+	/*
+	 * If n and m are infinity, the result is invalid
+	 * If n and m are zero, the result is invalid
+	 */
+	if (tm & tn & (VFP_INFINITY|VFP_ZERO))
+		goto invalid;
+
+	/*
+	 * If n is infinity, the result is infinity
+	 */
+	if (tn & VFP_INFINITY)
+		goto infinity;
+
+	/*
+	 * If m is zero, raise div0 exceptions
+	 */
+	if (tm & VFP_ZERO)
+		goto divzero;
+
+	/*
+	 * If m is infinity, or n is zero, the result is zero
+	 */
+	if (tm & VFP_INFINITY || tn & VFP_ZERO)
+		goto zero;
+
+	if (tn & VFP_DENORMAL)
+		vfp_double_normalise_denormal(&vdn);
+	if (tm & VFP_DENORMAL)
+		vfp_double_normalise_denormal(&vdm);
+
+	/*
+	 * Ok, we have two numbers, we can perform division.
+	 */	
+	vdd.exponent = vdn.exponent - vdm.exponent + 1023 - 1;
+	vdm.significand <<= 1;
+	if (vdm.significand <= (2 * vdn.significand)) {
+		vdn.significand >>= 1;
+		vdd.exponent++;
+	}
+	vdd.significand = vfp_estimate_div128to64(vdn.significand, 0, vdm.significand);
+	if ((vdd.significand & 0x1ff) <= 2) {
+		u64 termh, terml, remh, reml;
+		mul64to128(&termh, &terml, vdm.significand, vdd.significand);
+		sub128(&remh, &reml, vdn.significand, 0, termh, terml);
+		while ((s64)remh < 0) {
+			vdd.significand -= 1;
+			add128(&remh, &reml, remh, reml, 0, vdm.significand);
+		}
+		vdd.significand |= (reml != 0);
+	}
+	return vfp_double_normaliseround(state, dd, &vdd, fpscr, 0, "fdiv");
+
+ vdn_nan:
+	exceptions = vfp_propagate_nan(&vdd, &vdn, &vdm, fpscr);
+ pack:
+	vfp_put_double(state, vfp_double_pack(&vdd), dd);
+	return exceptions;
+
+ vdm_nan:
+	exceptions = vfp_propagate_nan(&vdd, &vdm, &vdn, fpscr);
+	goto pack;
+
+ zero:
+	vdd.exponent = 0;
+	vdd.significand = 0;
+	goto pack;
+
+ divzero:
+	exceptions = FPSCR_DZC;
+ infinity:
+	vdd.exponent = 2047;
+	vdd.significand = 0;
+	goto pack;
+
+ invalid:
+	vfp_put_double(state, vfp_double_pack(&vfp_double_default_qnan), dd);
+	return FPSCR_IOC;
+}
+
+static struct op fops[] = {
+	{ vfp_double_fmac,  0 },
+	{ vfp_double_fmsc,  0 },
+	{ vfp_double_fmul,  0 },
+	{ vfp_double_fadd,  0 },
+	{ vfp_double_fnmac, 0 },
+	{ vfp_double_fnmsc, 0 },
+	{ vfp_double_fnmul, 0 },
+	{ vfp_double_fsub,  0 },
+	{ vfp_double_fdiv,  0 },
+};
+
+#define FREG_BANK(x)	((x) & 0x0c)
+#define FREG_IDX(x)	((x) & 3)
+
+u32 vfp_double_cpdo(ARMul_State* state, u32 inst, u32 fpscr)
+{
+	u32 op = inst & FOP_MASK;
+	u32 exceptions = 0;
+	unsigned int dest;
+	unsigned int dn = vfp_get_dn(inst);
+	unsigned int dm;
+	unsigned int vecitr, veclen, vecstride;
+	struct op *fop;
+
+	pr_debug("In %s\n", __FUNCTION__);
+	vecstride = (1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK));
+
+	fop = (op == FOP_EXT) ? &fops_ext[FEXT_TO_IDX(inst)] : &fops[FOP_TO_IDX(op)];
+
+	/*
+	 * fcvtds takes an sN register number as destination, not dN.
+	 * It also always operates on scalars.
+	 */
+	if (fop->flags & OP_SD)
+		dest = vfp_get_sd(inst);
+	else
+		dest = vfp_get_dd(inst);
+
+	/*
+	 * f[us]ito takes a sN operand, not a dN operand.
+	 */
+	if (fop->flags & OP_SM)
+		dm = vfp_get_sm(inst);
+	else
+		dm = vfp_get_dm(inst);
+
+	/*
+	 * If destination bank is zero, vector length is always '1'.
+	 * ARM DDI0100F C5.1.3, C5.3.2.
+	 */
+	if ((fop->flags & OP_SCALAR) || (FREG_BANK(dest) == 0))
+		veclen = 0;
+	else
+		veclen = fpscr & FPSCR_LENGTH_MASK;
+
+	pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride,
+		 (veclen >> FPSCR_LENGTH_BIT) + 1);
+
+	if (!fop->fn) {
+		printf("VFP: could not find double op %d\n", FEXT_TO_IDX(inst));
+		goto invalid;
+	}
+
+	for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) {
+		u32 except;
+		char type;
+
+		type = fop->flags & OP_SD ? 's' : 'd';
+		if (op == FOP_EXT)
+			pr_debug("VFP: itr%d (%c%u) = op[%u] (d%u)\n",
+				 vecitr >> FPSCR_LENGTH_BIT,
+				 type, dest, dn, dm);
+		else
+			pr_debug("VFP: itr%d (%c%u) = (d%u) op[%u] (d%u)\n",
+				 vecitr >> FPSCR_LENGTH_BIT,
+				 type, dest, dn, FOP_TO_IDX(op), dm);
+
+		except = fop->fn(state, dest, dn, dm, fpscr);
+		pr_debug("VFP: itr%d: exceptions=%08x\n",
+			 vecitr >> FPSCR_LENGTH_BIT, except);
+
+		exceptions |= except;
+
+		/*
+		 * CHECK: It appears to be undefined whether we stop when
+		 * we encounter an exception.  We continue.
+		 */
+		dest = FREG_BANK(dest) + ((FREG_IDX(dest) + vecstride) & 3);
+		dn = FREG_BANK(dn) + ((FREG_IDX(dn) + vecstride) & 3);
+		if (FREG_BANK(dm) != 0)
+			dm = FREG_BANK(dm) + ((FREG_IDX(dm) + vecstride) & 3);
+	}
+	return exceptions;
+
+ invalid:
+	return ~0;
+}
diff --git a/src/core/arm/interpreter/vfp/vfpinstr.cpp b/src/core/arm/interpreter/vfp/vfpinstr.cpp
new file mode 100644
index 000000000..a57047911
--- /dev/null
+++ b/src/core/arm/interpreter/vfp/vfpinstr.cpp
@@ -0,0 +1,5123 @@
+/*
+    vfp/vfpinstr.c - ARM VFPv3 emulation unit - Individual instructions data
+    Copyright (C) 2003 Skyeye Develop Group
+    for help please send mail to <skyeye-developer@lists.gro.clinux.org>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/* Notice: this file should not be compiled as is, and is meant to be
+   included in other files only. */
+
+/* ----------------------------------------------------------------------- */
+/* CDP instructions */
+/* cond 1110 opc1 CRn- CRd- copr op20 CRm- CDP */
+
+/* ----------------------------------------------------------------------- */
+/* VMLA */
+/* cond 1110 0D00 Vn-- Vd-- 101X N0M0 Vm-- */
+#define vfpinstr 	vmla
+#define vfpinstr_inst 	vmla_inst
+#define VFPLABEL_INST 	VMLA_INST
+#ifdef VFP_DECODE
+{"vmla",        4,      ARMVFP2,        23, 27, 0x1c,   20, 21, 0x0,    9, 11, 0x5,     4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vmla",        0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vmla_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		DBG("VMLA :\n");
+		
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 0 && (OPC_2 & 0x2) == 0)
+{
+	DBG("VMLA :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int m;
+	int n;
+	int d ;
+	int add = (BIT(6) == 0);
+	int s = BIT(8) == 0;
+	Value *mm;
+	Value *nn;
+	Value *tmp;
+	if(s){
+		m = BIT(5) | BITS(0,3) << 1;
+		n = BIT(7) | BITS(16,19) << 1;
+		d = BIT(22) | BITS(12,15) << 1;
+		mm = FR32(m);
+		nn = FR32(n);
+		tmp = FPMUL(nn,mm);
+		if(!add)
+			tmp = FPNEG32(tmp);
+		mm = FR32(d);
+		tmp = FPADD(mm,tmp);
+		//LETS(d,tmp);
+		LETFPS(d,tmp);
+	}else {
+		m = BITS(0,3) | BIT(5) << 4;
+		n = BITS(16,19) | BIT(7) << 4;
+		d = BIT(22) << 4 | BITS(12,15);
+		//mm = SITOFP(32,RSPR(m));
+		//LETS(d,tmp);
+		mm = ZEXT64(IBITCAST32(FR32(2 * m)));
+		nn = ZEXT64(IBITCAST32(FR32(2 * m  + 1)));
+		tmp = OR(SHL(nn,CONST64(32)),mm);
+		mm = FPBITCAST64(tmp);
+		tmp = ZEXT64(IBITCAST32(FR32(2 * n)));
+		nn = ZEXT64(IBITCAST32(FR32(2 * n  + 1)));
+		nn = OR(SHL(nn,CONST64(32)),tmp);
+		nn = FPBITCAST64(nn);
+		tmp = FPMUL(nn,mm);
+		if(!add)
+			tmp = FPNEG64(tmp);
+		mm = ZEXT64(IBITCAST32(FR32(2 * d)));
+		nn = ZEXT64(IBITCAST32(FR32(2 * d  + 1)));
+		mm = OR(SHL(nn,CONST64(32)),mm);
+		mm = FPBITCAST64(mm);
+		tmp = FPADD(mm,tmp);
+		mm = TRUNC32(LSHR(IBITCAST64(tmp),CONST64(32)));
+		nn = TRUNC32(AND(IBITCAST64(tmp),CONST64(0xffffffff)));	
+		LETFPS(2*d ,FPBITCAST32(nn));
+		LETFPS(d*2 + 1 , FPBITCAST32(mm));
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VNMLS */
+/* cond 1110 0D00 Vn-- Vd-- 101X N1M0 Vm-- */
+#define vfpinstr 	vmls
+#define vfpinstr_inst 	vmls_inst
+#define VFPLABEL_INST 	VMLS_INST
+#ifdef VFP_DECODE
+{"vmls",        7,      ARMVFP2,    28 , 31, 0xF, 25, 27, 0x1,   23, 23, 1,  11, 11, 0,  8, 9, 0x2,  6, 6, 1,  4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vmls",        0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vmls_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		DBG("VMLS :\n");
+		
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 0 && (OPC_2 & 0x2) == 2)
+{
+	DBG("VMLS :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s VMLS instruction is executed out of here.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int m;
+	int n;
+	int d ;
+	int add = (BIT(6) == 0);
+	int s = BIT(8) == 0;
+	Value *mm;
+	Value *nn;
+	Value *tmp;
+	if(s){
+		m = BIT(5) | BITS(0,3) << 1;
+		n = BIT(7) | BITS(16,19) << 1;
+		d = BIT(22) | BITS(12,15) << 1;
+		mm = FR32(m);
+		nn = FR32(n);
+		tmp = FPMUL(nn,mm);
+		if(!add)
+			tmp = FPNEG32(tmp);
+		mm = FR32(d);
+		tmp = FPADD(mm,tmp);
+		//LETS(d,tmp);
+		LETFPS(d,tmp);
+	}else {
+		m = BITS(0,3) | BIT(5) << 4;
+		n = BITS(16,19) | BIT(7) << 4;
+		d = BIT(22) << 4 | BITS(12,15);
+		//mm = SITOFP(32,RSPR(m));
+		//LETS(d,tmp);
+		mm = ZEXT64(IBITCAST32(FR32(2 * m)));
+		nn = ZEXT64(IBITCAST32(FR32(2 * m  + 1)));
+		tmp = OR(SHL(nn,CONST64(32)),mm);
+		mm = FPBITCAST64(tmp);
+		tmp = ZEXT64(IBITCAST32(FR32(2 * n)));
+		nn = ZEXT64(IBITCAST32(FR32(2 * n  + 1)));
+		nn = OR(SHL(nn,CONST64(32)),tmp);
+		nn = FPBITCAST64(nn);
+		tmp = FPMUL(nn,mm);
+		if(!add)
+			tmp = FPNEG64(tmp);
+		mm = ZEXT64(IBITCAST32(FR32(2 * d)));
+		nn = ZEXT64(IBITCAST32(FR32(2 * d  + 1)));
+		mm = OR(SHL(nn,CONST64(32)),mm);
+		mm = FPBITCAST64(mm);
+		tmp = FPADD(mm,tmp);
+		mm = TRUNC32(LSHR(IBITCAST64(tmp),CONST64(32)));
+		nn = TRUNC32(AND(IBITCAST64(tmp),CONST64(0xffffffff)));	
+		LETFPS(2*d ,FPBITCAST32(nn));
+		LETFPS(d*2 + 1 , FPBITCAST32(mm));
+	}	
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VNMLA */
+/* cond 1110 0D01 Vn-- Vd-- 101X N1M0 Vm-- */
+#define vfpinstr 	vnmla
+#define vfpinstr_inst 	vnmla_inst
+#define VFPLABEL_INST 	VNMLA_INST
+#ifdef VFP_DECODE
+//{"vnmla",       5,      ARMVFP2,        23, 27, 0x1c,   20, 21, 0x0,    9, 11, 0x5,     6, 6, 1,        4, 4, 0},
+{"vnmla",       4,      ARMVFP2,        23, 27, 0x1c,   20, 21, 0x1,    9, 11, 0x5,     4, 4, 0},
+{"vnmla",       5,      ARMVFP2,        23, 27, 0x1c,   20, 21, 0x2,    9, 11, 0x5,     6, 6, 1,  4, 4, 0},
+//{"vnmla",       5,      ARMVFP2,        23, 27, 0x1c,   20, 21, 0x2,    9, 11, 0x5,     6, 6, 1,        4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vnmla",       0,      ARMVFP2, 0},
+{"vnmla",       0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vnmla_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		DBG("VNMLA :\n");
+
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 1 && (OPC_2 & 0x2) == 2)
+{
+	DBG("VNMLA :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s VNMLA instruction is executed out of here.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int m;
+	int n;
+	int d ;
+	int add = (BIT(6) == 0);
+	int s = BIT(8) == 0;
+	Value *mm;
+	Value *nn;
+	Value *tmp;
+	if(s){
+		m = BIT(5) | BITS(0,3) << 1;
+		n = BIT(7) | BITS(16,19) << 1;
+		d = BIT(22) | BITS(12,15) << 1;
+		mm = FR32(m);
+		nn = FR32(n);
+		tmp = FPMUL(nn,mm);
+		if(!add)
+			tmp = FPNEG32(tmp);
+		mm = FR32(d);
+		tmp = FPADD(FPNEG32(mm),tmp);
+		//LETS(d,tmp);
+		LETFPS(d,tmp);
+	}else {
+		m = BITS(0,3) | BIT(5) << 4;
+		n = BITS(16,19) | BIT(7) << 4;
+		d = BIT(22) << 4 | BITS(12,15);
+		//mm = SITOFP(32,RSPR(m));
+		//LETS(d,tmp);
+		mm = ZEXT64(IBITCAST32(FR32(2 * m)));
+		nn = ZEXT64(IBITCAST32(FR32(2 * m  + 1)));
+		tmp = OR(SHL(nn,CONST64(32)),mm);
+		mm = FPBITCAST64(tmp);
+		tmp = ZEXT64(IBITCAST32(FR32(2 * n)));
+		nn = ZEXT64(IBITCAST32(FR32(2 * n  + 1)));
+		nn = OR(SHL(nn,CONST64(32)),tmp);
+		nn = FPBITCAST64(nn);
+		tmp = FPMUL(nn,mm);
+		if(!add)
+			tmp = FPNEG64(tmp);
+		mm = ZEXT64(IBITCAST32(FR32(2 * d)));
+		nn = ZEXT64(IBITCAST32(FR32(2 * d  + 1)));
+		mm = OR(SHL(nn,CONST64(32)),mm);
+		mm = FPBITCAST64(mm);
+		tmp = FPADD(FPNEG64(mm),tmp);
+		mm = TRUNC32(LSHR(IBITCAST64(tmp),CONST64(32)));	
+		nn = TRUNC32(AND(IBITCAST64(tmp),CONST64(0xffffffff)));
+		LETFPS(2*d ,FPBITCAST32(nn));
+		LETFPS(d*2 + 1 , FPBITCAST32(mm));
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VNMLS */
+/* cond 1110 0D01 Vn-- Vd-- 101X N0M0 Vm-- */
+#define vfpinstr 	vnmls
+#define vfpinstr_inst 	vnmls_inst
+#define VFPLABEL_INST 	VNMLS_INST
+#ifdef VFP_DECODE
+{"vnmls",       5,      ARMVFP2,        23, 27, 0x1c,   20, 21, 0x1,    9, 11, 0x5,     6, 6, 0,        4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vnmls",       0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vnmls_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		DBG("VNMLS :\n");
+
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 1 && (OPC_2 & 0x2) == 0)
+{
+	DBG("VNMLS :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int m;
+	int n;
+	int d ;
+	int add = (BIT(6) == 0);
+	int s = BIT(8) == 0;
+	Value *mm;
+	Value *nn;
+	Value *tmp;
+	if(s){
+		m = BIT(5) | BITS(0,3) << 1;
+		n = BIT(7) | BITS(16,19) << 1;
+		d = BIT(22) | BITS(12,15) << 1;
+		mm = FR32(m);
+		nn = FR32(n);
+		tmp = FPMUL(nn,mm);
+		if(!add)
+			tmp = FPNEG32(tmp);
+		mm = FR32(d);
+		tmp = FPADD(FPNEG32(mm),tmp);
+		//LETS(d,tmp);
+		LETFPS(d,tmp);
+	}else {
+		m = BITS(0,3) | BIT(5) << 4;
+		n = BITS(16,19) | BIT(7) << 4;
+		d = BIT(22) << 4 | BITS(12,15);
+		//mm = SITOFP(32,RSPR(m));
+		//LETS(d,tmp);
+		mm = ZEXT64(IBITCAST32(FR32(2 * m)));
+		nn = ZEXT64(IBITCAST32(FR32(2 * m  + 1)));
+		tmp = OR(SHL(nn,CONST64(32)),mm);
+		mm = FPBITCAST64(tmp);
+		tmp = ZEXT64(IBITCAST32(FR32(2 * n)));
+		nn = ZEXT64(IBITCAST32(FR32(2 * n  + 1)));
+		nn = OR(SHL(nn,CONST64(32)),tmp);
+		nn = FPBITCAST64(nn);
+		tmp = FPMUL(nn,mm);
+		if(!add)
+			tmp = FPNEG64(tmp);
+		mm = ZEXT64(IBITCAST32(FR32(2 * d)));
+		nn = ZEXT64(IBITCAST32(FR32(2 * d  + 1)));
+		mm = OR(SHL(nn,CONST64(32)),mm);
+		mm = FPBITCAST64(mm);
+		tmp = FPADD(FPNEG64(mm),tmp);
+		mm = TRUNC32(LSHR(IBITCAST64(tmp),CONST64(32)));
+		nn = TRUNC32(AND(IBITCAST64(tmp),CONST64(0xffffffff)));	
+		LETFPS(2*d ,FPBITCAST32(nn));
+		LETFPS(d*2 + 1 , FPBITCAST32(mm));
+	}	
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VNMUL */
+/* cond 1110 0D10 Vn-- Vd-- 101X N0M0 Vm-- */
+#define vfpinstr 	vnmul
+#define vfpinstr_inst 	vnmul_inst
+#define VFPLABEL_INST 	VNMUL_INST
+#ifdef VFP_DECODE
+{"vnmul",       5,      ARMVFP2,        23, 27, 0x1c,   20, 21, 0x2,    9, 11, 0x5,     6, 6, 1,        4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vnmul",       0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vnmul_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		DBG("VNMUL :\n");
+
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 2 && (OPC_2 & 0x2) == 2)
+{
+	DBG("VNMUL :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}		
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int m;
+	int n;
+	int d ;
+	int add = (BIT(6) == 0);
+	int s = BIT(8) == 0;
+	Value *mm;
+	Value *nn;
+	Value *tmp;
+	if(s){
+		m = BIT(5) | BITS(0,3) << 1;
+		n = BIT(7) | BITS(16,19) << 1;
+		d = BIT(22) | BITS(12,15) << 1;
+		mm = FR32(m);
+		nn = FR32(n);
+		tmp = FPMUL(nn,mm);
+		//LETS(d,tmp);
+		LETFPS(d,FPNEG32(tmp));
+	}else {
+		m = BITS(0,3) | BIT(5) << 4;
+		n = BITS(16,19) | BIT(7) << 4;
+		d = BIT(22) << 4 | BITS(12,15);
+		//mm = SITOFP(32,RSPR(m));
+		//LETS(d,tmp);
+		mm = ZEXT64(IBITCAST32(FR32(2 * m)));
+		nn = ZEXT64(IBITCAST32(FR32(2 * m  + 1)));
+		tmp = OR(SHL(nn,CONST64(32)),mm);
+		mm = FPBITCAST64(tmp);
+		tmp = ZEXT64(IBITCAST32(FR32(2 * n)));
+		nn = ZEXT64(IBITCAST32(FR32(2 * n  + 1)));
+		nn = OR(SHL(nn,CONST64(32)),tmp);
+		nn = FPBITCAST64(nn);
+		tmp = FPMUL(nn,mm);
+		tmp = FPNEG64(tmp);
+		mm = TRUNC32(LSHR(IBITCAST64(tmp),CONST64(32)));
+		nn = TRUNC32(AND(IBITCAST64(tmp),CONST64(0xffffffff)));	
+		LETFPS(2*d ,FPBITCAST32(nn));
+		LETFPS(d*2 + 1 , FPBITCAST32(mm));
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VMUL */
+/* cond 1110 0D10 Vn-- Vd-- 101X N0M0 Vm-- */
+#define vfpinstr 	vmul
+#define vfpinstr_inst 	vmul_inst
+#define VFPLABEL_INST 	VMUL_INST
+#ifdef VFP_DECODE
+{"vmul",        5,      ARMVFP2,        23, 27, 0x1c,  20, 21, 0x2,     9, 11, 0x5,      6, 6, 0,     4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vmul",        0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vmul_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		DBG("VMUL :\n");
+
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 2 && (OPC_2 & 0x2) == 0)
+{
+	DBG("VMUL :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//printf("\n\n\t\tin %s instruction is executed out.\n\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int m;
+	int n;
+	int d ;
+	int s = BIT(8) == 0;
+	Value *mm;
+	Value *nn;
+	Value *tmp;
+	if(s){
+		m = BIT(5) | BITS(0,3) << 1;
+		n = BIT(7) | BITS(16,19) << 1;
+		d = BIT(22) | BITS(12,15) << 1;
+		//mm = SITOFP(32,FR(m));
+		//nn = SITOFP(32,FRn));
+		mm = FR32(m);
+		nn = FR32(n);
+		tmp = FPMUL(nn,mm);
+		//LETS(d,tmp);
+		LETFPS(d,tmp);
+	}else {
+		m = BITS(0,3) | BIT(5) << 4;
+		n = BITS(16,19) | BIT(7) << 4;
+		d = BIT(22) << 4 | BITS(12,15);
+		//mm = SITOFP(32,RSPR(m));
+		//LETS(d,tmp);
+		Value *lo = FR32(2 * m);
+		Value *hi = FR32(2 * m + 1);
+		hi = IBITCAST32(hi);
+		lo = IBITCAST32(lo);
+		Value *hi64 = ZEXT64(hi);
+		Value* lo64 = ZEXT64(lo);
+		Value* v64 = OR(SHL(hi64,CONST64(32)),lo64);
+		Value* m0 = FPBITCAST64(v64);
+		lo = FR32(2 * n);
+		hi = FR32(2 * n + 1);
+		hi = IBITCAST32(hi);
+		lo = IBITCAST32(lo);
+		hi64 = ZEXT64(hi);
+		lo64 = ZEXT64(lo);
+		v64 = OR(SHL(hi64,CONST64(32)),lo64);
+		Value *n0 = FPBITCAST64(v64); 
+		tmp = FPMUL(n0,m0);
+		Value *val64 = IBITCAST64(tmp);
+		hi = LSHR(val64,CONST64(32));
+		lo = AND(val64,CONST64(0xffffffff));
+		hi = TRUNC32(hi);
+		lo  = TRUNC32(lo);
+		hi = FPBITCAST32(hi);
+		lo = FPBITCAST32(lo);		
+		LETFPS(2*d ,lo);
+		LETFPS(d*2 + 1 , hi);
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VADD */
+/* cond 1110 0D11 Vn-- Vd-- 101X N0M0 Vm-- */
+#define vfpinstr 	vadd
+#define vfpinstr_inst 	vadd_inst
+#define VFPLABEL_INST 	VADD_INST
+#ifdef VFP_DECODE
+{"vadd",        5,      ARMVFP2,        23, 27, 0x1c,  20, 21, 0x3,     9, 11, 0x5,      6, 6, 0,     4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vadd",        0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vadd_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		DBG("VADD :\n");
+
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 3 && (OPC_2 & 0x2) == 0)
+{
+	DBG("VADD :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instruction will implement out of JIT.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int m;
+	int n;
+	int d ;
+	int s = BIT(8) == 0;
+	Value *mm;
+	Value *nn;
+	Value *tmp;
+	if(s){
+		m = BIT(5) | BITS(0,3) << 1;
+		n = BIT(7) | BITS(16,19) << 1;
+		d = BIT(22) | BITS(12,15) << 1;
+		mm = FR32(m);
+		nn = FR32(n);
+		tmp = FPADD(nn,mm);
+		LETFPS(d,tmp);
+	}else {
+		m = BITS(0,3) | BIT(5) << 4;
+		n = BITS(16,19) | BIT(7) << 4;
+		d = BIT(22) << 4 | BITS(12,15);
+		Value *lo = FR32(2 * m);
+		Value *hi = FR32(2 * m + 1);
+		hi = IBITCAST32(hi);
+		lo = IBITCAST32(lo);
+		Value *hi64 = ZEXT64(hi);
+		Value* lo64 = ZEXT64(lo);
+		Value* v64 = OR(SHL(hi64,CONST64(32)),lo64);
+		Value* m0 = FPBITCAST64(v64);
+		lo = FR32(2 * n);
+		hi = FR32(2 * n + 1);
+		hi = IBITCAST32(hi);
+		lo = IBITCAST32(lo);
+		hi64 = ZEXT64(hi);
+		lo64 = ZEXT64(lo);
+		v64 = OR(SHL(hi64,CONST64(32)),lo64);
+		Value *n0 = FPBITCAST64(v64); 
+		tmp = FPADD(n0,m0);
+		Value *val64 = IBITCAST64(tmp);
+		hi = LSHR(val64,CONST64(32));
+		lo = AND(val64,CONST64(0xffffffff));
+		hi = TRUNC32(hi);
+		lo  = TRUNC32(lo);
+		hi = FPBITCAST32(hi);
+		lo = FPBITCAST32(lo);		
+		LETFPS(2*d ,lo);
+		LETFPS(d*2 + 1 , hi);
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VSUB */
+/* cond 1110 0D11 Vn-- Vd-- 101X N1M0 Vm-- */
+#define vfpinstr 	vsub
+#define vfpinstr_inst 	vsub_inst
+#define VFPLABEL_INST 	VSUB_INST
+#ifdef VFP_DECODE
+{"vsub",        5,      ARMVFP2,        23, 27, 0x1c,  20, 21, 0x3,     9, 11, 0x5,      6, 6, 1,     4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vsub",        0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vsub_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		DBG("VSUB :\n");
+
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 3 && (OPC_2 & 0x2) == 2)
+{
+	DBG("VSUB :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instr=0x%x, instruction is executed out of JIT.\n", __FUNCTION__, instr);
+	//arch_arm_undef(cpu, bb, instr);
+	int m;
+	int n;
+	int d ;
+	int s = BIT(8) == 0;
+	Value *mm;
+	Value *nn;
+	Value *tmp;
+	if(s){
+		m = BIT(5) | BITS(0,3) << 1;
+		n = BIT(7) | BITS(16,19) << 1;
+		d = BIT(22) | BITS(12,15) << 1;
+		mm = FR32(m);
+		nn = FR32(n);
+		tmp = FPSUB(nn,mm);
+		LETFPS(d,tmp);
+	}else {
+		m = BITS(0,3) | BIT(5) << 4;
+		n = BITS(16,19) | BIT(7) << 4;
+		d = BIT(22) << 4 | BITS(12,15);
+		Value *lo = FR32(2 * m);
+		Value *hi = FR32(2 * m + 1);
+		hi = IBITCAST32(hi);
+		lo = IBITCAST32(lo);
+		Value *hi64 = ZEXT64(hi);
+		Value* lo64 = ZEXT64(lo);
+		Value* v64 = OR(SHL(hi64,CONST64(32)),lo64);
+		Value* m0 = FPBITCAST64(v64);
+		lo = FR32(2 * n);
+		hi = FR32(2 * n + 1);
+		hi = IBITCAST32(hi);
+		lo = IBITCAST32(lo);
+		hi64 = ZEXT64(hi);
+		lo64 = ZEXT64(lo);
+		v64 = OR(SHL(hi64,CONST64(32)),lo64);
+		Value *n0 = FPBITCAST64(v64); 
+		tmp = FPSUB(n0,m0);
+		Value *val64 = IBITCAST64(tmp);
+		hi = LSHR(val64,CONST64(32));
+		lo = AND(val64,CONST64(0xffffffff));
+		hi = TRUNC32(hi);
+		lo  = TRUNC32(lo);
+		hi = FPBITCAST32(hi);
+		lo = FPBITCAST32(lo);		
+		LETFPS(2*d ,lo);
+		LETFPS(d*2 + 1 , hi);
+	} 
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VDIV */
+/* cond 1110 1D00 Vn-- Vd-- 101X N0M0 Vm-- */
+#define vfpinstr 	vdiv
+#define vfpinstr_inst 	vdiv_inst
+#define VFPLABEL_INST 	VDIV_INST
+#ifdef VFP_DECODE
+{"vdiv",        5,      ARMVFP2,        23, 27, 0x1d,  20, 21, 0x0,     9, 11, 0x5,      6, 6, 0,     4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vdiv",        0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vdiv_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		DBG("VDIV :\n");
+
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 0xA && (OPC_2 & 0x2) == 0)
+{
+	DBG("VDIV :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int m;
+	int n;
+	int d ;
+	int s = BIT(8) == 0;
+	Value *mm;
+	Value *nn;
+	Value *tmp;
+	if(s){
+		m = BIT(5) | BITS(0,3) << 1;
+		n = BIT(7) | BITS(16,19) << 1;
+		d = BIT(22) | BITS(12,15) << 1;
+		mm = FR32(m);
+		nn = FR32(n);
+		tmp = FPDIV(nn,mm);
+		LETFPS(d,tmp);
+	}else {
+		m = BITS(0,3) | BIT(5) << 4;
+		n = BITS(16,19) | BIT(7) << 4;
+		d = BIT(22) << 4 | BITS(12,15);
+		Value *lo = FR32(2 * m);
+		Value *hi = FR32(2 * m + 1);
+		hi = IBITCAST32(hi);
+		lo = IBITCAST32(lo);
+		Value *hi64 = ZEXT64(hi);
+		Value* lo64 = ZEXT64(lo);
+		Value* v64 = OR(SHL(hi64,CONST64(32)),lo64);
+		Value* m0 = FPBITCAST64(v64);
+		lo = FR32(2 * n);
+		hi = FR32(2 * n + 1);
+		hi = IBITCAST32(hi);
+		lo = IBITCAST32(lo);
+		hi64 = ZEXT64(hi);
+		lo64 = ZEXT64(lo);
+		v64 = OR(SHL(hi64,CONST64(32)),lo64);
+		Value *n0 = FPBITCAST64(v64); 
+		tmp = FPDIV(n0,m0);
+		Value *val64 = IBITCAST64(tmp);
+		hi = LSHR(val64,CONST64(32));
+		lo = AND(val64,CONST64(0xffffffff));
+		hi = TRUNC32(hi);
+		lo  = TRUNC32(lo);
+		hi = FPBITCAST32(hi);
+		lo = FPBITCAST32(lo);		
+		LETFPS(2*d ,lo);
+		LETFPS(d*2 + 1 , hi);
+	} 		
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VMOVI move immediate */
+/* cond 1110 1D11 im4H Vd-- 101X 0000 im4L */
+/* cond 1110 opc1 CRn- CRd- copr op20 CRm- CDP */
+#define vfpinstr 	vmovi
+#define vfpinstr_inst 	vmovi_inst
+#define VFPLABEL_INST 	VMOVI_INST
+#ifdef VFP_DECODE
+{"vmov(i)",       4,      ARMVFP3,        23, 27, 0x1d,   20, 21, 0x3,    9, 11, 0x5,     4, 7, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vmov(i)",       0,      ARMVFP3, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vmovi_inst {
+	unsigned int single;
+	unsigned int d;
+	unsigned int imm;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+	
+	inst_cream->single   = BIT(inst, 8) == 0;
+	inst_cream->d        = (inst_cream->single ? BITS(inst,12,15)<<1 | BIT(inst,22) : BITS(inst,12,15) | BIT(inst,22)<<4);
+	unsigned int imm8 = BITS(inst, 16, 19) << 4 | BITS(inst, 0, 3);
+	if (inst_cream->single)
+		inst_cream->imm = BIT(imm8, 7)<<31 | (BIT(imm8, 6)==0)<<30 | (BIT(imm8, 6) ? 0x1f : 0)<<25 | BITS(imm8, 0, 5)<<19;
+	else
+		inst_cream->imm = BIT(imm8, 7)<<31 | (BIT(imm8, 6)==0)<<30 | (BIT(imm8, 6) ? 0xff : 0)<<22 | BITS(imm8, 0, 5)<<16;
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		VMOVI(cpu, inst_cream->single, inst_cream->d, inst_cream->imm);
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ( (OPC_1 & 0xb) == 0xb && BITS(4, 7) == 0)
+{
+	unsigned int single   = BIT(8) == 0;
+	unsigned int d        = (single ? BITS(12,15)<<1 | BIT(22) : BITS(12,15) | BIT(22)<<4);
+	unsigned int imm;
+	instr = BITS(16, 19) << 4 | BITS(0, 3); /* FIXME dirty workaround to get a correct imm */
+	if (single) {
+		imm = BIT(7)<<31 | (BIT(6)==0)<<30 | (BIT(6) ? 0x1f : 0)<<25 | BITS(0, 5)<<19;
+	} else {
+		imm = BIT(7)<<31 | (BIT(6)==0)<<30 | (BIT(6) ? 0xff : 0)<<22 | BITS(0, 5)<<16;
+	}
+	VMOVI(state, single, d, imm);
+	return ARMul_DONE;
+}
+#endif
+#ifdef VFP_CDP_IMPL
+void VMOVI(ARMul_State * state, ARMword single, ARMword d, ARMword imm)
+{
+	DBG("VMOV(I) :\n");
+		
+	if (single)
+	{
+		DBG("\ts%d <= [%x]\n", d, imm);
+		state->ExtReg[d] = imm;
+	}
+	else
+	{
+		/* Check endian please */
+		DBG("\ts[%d-%d] <= [%x-%x]\n", d*2+1, d*2, imm, 0);
+		state->ExtReg[d*2+1] = imm;
+		state->ExtReg[d*2] = 0;
+	}
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int single = (BIT(8) == 0);
+	int d;
+	int imm32;
+	Value *v;
+	Value *tmp;
+	v = CONST32(BITS(0,3) | BITS(16,19) << 4);
+	//v = CONST64(0x3ff0000000000000);
+	if(single){
+		d = BIT(22) | BITS(12,15) << 1;
+	}else {
+		d = BITS(12,15) | BIT(22) << 4;
+	}
+	if(single){
+		LETFPS(d,FPBITCAST32(v));
+	}else {
+		//v = UITOFP(64,v);
+		//tmp = IBITCAST64(v);
+		LETFPS(d*2 ,FPBITCAST32(TRUNC32(AND(v,CONST64(0xffffffff)))));
+		LETFPS(d * 2 + 1,FPBITCAST32(TRUNC32(LSHR(v,CONST64(32)))));
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VMOVR move register */
+/* cond 1110 1D11 0000 Vd-- 101X 01M0 Vm-- */
+/* cond 1110 opc1 CRn- CRd- copr op20 CRm- CDP */
+#define vfpinstr 	vmovr
+#define vfpinstr_inst 	vmovr_inst
+#define VFPLABEL_INST 	VMOVR_INST
+#ifdef VFP_DECODE
+{"vmov(r)",       5,      ARMVFP3,        23, 27, 0x1d,   16, 21, 0x30,    9, 11, 0x5,    6, 7, 1,        4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vmov(r)",       0,      ARMVFP3, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vmovr_inst {
+	unsigned int single;
+	unsigned int d;
+	unsigned int m;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	VFP_DEBUG_UNTESTED(VMOVR);
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+	
+	inst_cream->single   = BIT(inst, 8) == 0;
+	inst_cream->d        = (inst_cream->single ? BITS(inst,12,15)<<1 | BIT(inst,22) : BITS(inst,12,15) | BIT(inst,22)<<4);
+	inst_cream->m        = (inst_cream->single ? BITS(inst, 0, 3)<<1 | BIT(inst, 5) : BITS(inst, 0, 3) | BIT(inst, 5)<<4);
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		VMOVR(cpu, inst_cream->single, inst_cream->d, inst_cream->m);
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ( (OPC_1 & 0xb) == 0xb && CRn == 0 && (OPC_2 & 0x6) == 0x2 )
+{
+	unsigned int single   = BIT(8) == 0;
+	unsigned int d        = (single ? BITS(12,15)<<1 | BIT(22) : BITS(12,15) | BIT(22)<<4);
+	unsigned int m        = (single ? BITS( 0, 3)<<1 | BIT( 5) : BITS( 0, 3) | BIT( 5)<<4);;
+	VMOVR(state, single, d, m);
+	return ARMul_DONE;
+}
+#endif
+#ifdef VFP_CDP_IMPL
+void VMOVR(ARMul_State * state, ARMword single, ARMword d, ARMword m)
+{
+	DBG("VMOV(R) :\n");
+		
+	if (single)
+	{
+		DBG("\ts%d <= s%d[%x]\n", d, m, state->ExtReg[m]);
+		state->ExtReg[d] = state->ExtReg[m];
+	}
+	else
+	{
+		/* Check endian please */
+		DBG("\ts[%d-%d] <= s[%d-%d][%x-%x]\n", d*2+1, d*2, m*2+1, m*2, state->ExtReg[m*2+1], state->ExtReg[m*2]);
+		state->ExtReg[d*2+1] = state->ExtReg[m*2+1];
+		state->ExtReg[d*2] = state->ExtReg[m*2];
+	}
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	DBG("In %s, pc=0x%x, next_pc=0x%x\n", __FUNCTION__, pc, *next_pc);
+	if(instr >> 28 != 0xe)
+		*tag |= TAG_CONDITIONAL;
+
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s VMOV \n", __FUNCTION__);
+	int single   = BIT(8) == 0;
+	int d        = (single ? BITS(12,15)<<1 | BIT(22) : BIT(22) << 4 | BITS(12,15));
+	int m        = (single ? BITS(0, 3)<<1 | BIT(5) : BITS(0, 3) | BIT(5)<<4);
+
+	if (single)
+	{
+		LETFPS(d, FR32(m));
+	}
+	else
+	{
+		/* Check endian please */
+		LETFPS((d*2 + 1), FR32(m*2 + 1));
+		LETFPS((d * 2), FR32(m * 2));
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VABS */
+/* cond 1110 1D11 0000 Vd-- 101X 11M0 Vm-- */
+#define vfpinstr 	vabs
+#define vfpinstr_inst 	vabs_inst
+#define VFPLABEL_INST 	VABS_INST
+#ifdef VFP_DECODE
+{"vabs",        5,      ARMVFP2,        23, 27, 0x1d,  16, 21, 0x30,    9, 11, 0x5,      6, 7, 3,     4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vabs",        0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vabs_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;VFP_DEBUG_UNTESTED(VABS);
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		DBG("VABS :\n");
+
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 0xB && CRn == 0 && (OPC_2 & 0x7) == 6)
+{
+	DBG("VABS :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int single   = BIT(8) == 0;
+	int d        = (single ? BITS(12,15)<<1 | BIT(22) : BIT(22) << 4 | BITS(12,15));
+	int m        = (single ? BITS(0, 3)<<1 | BIT(5) : BITS(0, 3) | BIT(5)<<4);
+	Value* m0;
+	if (single)
+	{
+		m0 =  FR32(m);
+		m0 = SELECT(FPCMP_OLT(m0,FPCONST32(0.0)),FPNEG32(m0),m0);
+		LETFPS(d,m0);
+	}
+	else
+	{
+		/* Check endian please */
+		Value *lo = FR32(2 * m);
+		Value *hi = FR32(2 * m + 1);
+		hi = IBITCAST32(hi);
+		lo = IBITCAST32(lo);
+		Value *hi64 = ZEXT64(hi);
+		Value* lo64 = ZEXT64(lo);
+		Value* v64 = OR(SHL(hi64,CONST64(32)),lo64);
+		m0 = FPBITCAST64(v64);
+		m0 = SELECT(FPCMP_OLT(m0,FPCONST64(0.0)),FPNEG64(m0),m0);
+		Value *val64 = IBITCAST64(m0);
+		hi = LSHR(val64,CONST64(32));
+		lo = AND(val64,CONST64(0xffffffff));
+		hi = TRUNC32(hi);
+		lo  = TRUNC32(lo);
+		hi = FPBITCAST32(hi);
+		lo = FPBITCAST32(lo);		
+		LETFPS(2*d ,lo);
+		LETFPS(d*2 + 1 , hi);
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VNEG */
+/* cond 1110 1D11 0001 Vd-- 101X 11M0 Vm-- */
+#define vfpinstr 	vneg
+#define vfpinstr_inst 	vneg_inst
+#define VFPLABEL_INST 	VNEG_INST
+#ifdef VFP_DECODE
+//{"vneg",        5,      ARMVFP2,        23, 27, 0x1d,  16, 21, 0x30,    9, 11, 0x5,      6, 7, 1,     4, 4, 0},
+{"vneg",        5,      ARMVFP2,        23, 27, 0x1d,  17, 21, 0x18,    9, 11, 0x5,      6, 7, 1,     4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vneg",        0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vneg_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;VFP_DEBUG_UNTESTED(VNEG);
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+
+		DBG("VNEG :\n");
+
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 0xB && CRn == 1 && (OPC_2 & 0x7) == 2)
+{
+	DBG("VNEG :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int single   = BIT(8) == 0;
+	int d        = (single ? BITS(12,15)<<1 | BIT(22) : BIT(22) << 4 | BITS(12,15));
+	int m        = (single ? BITS(0, 3)<<1 | BIT(5) : BITS(0, 3) | BIT(5)<<4);
+	Value* m0;
+	if (single)
+	{
+		m0 =  FR32(m);
+		m0 = FPNEG32(m0);
+		LETFPS(d,m0);
+	}
+	else
+	{
+		/* Check endian please */
+		Value *lo = FR32(2 * m);
+		Value *hi = FR32(2 * m + 1);
+		hi = IBITCAST32(hi);
+		lo = IBITCAST32(lo);
+		Value *hi64 = ZEXT64(hi);
+		Value* lo64 = ZEXT64(lo);
+		Value* v64 = OR(SHL(hi64,CONST64(32)),lo64);
+		m0 = FPBITCAST64(v64);
+		m0 = FPNEG64(m0);
+		Value *val64 = IBITCAST64(m0);
+		hi = LSHR(val64,CONST64(32));
+		lo = AND(val64,CONST64(0xffffffff));
+		hi = TRUNC32(hi);
+		lo  = TRUNC32(lo);
+		hi = FPBITCAST32(hi);
+		lo = FPBITCAST32(lo);		
+		LETFPS(2*d ,lo);
+		LETFPS(d*2 + 1 , hi);
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VSQRT */
+/* cond 1110 1D11 0001 Vd-- 101X 11M0 Vm-- */
+#define vfpinstr 	vsqrt
+#define vfpinstr_inst 	vsqrt_inst
+#define VFPLABEL_INST 	VSQRT_INST
+#ifdef VFP_DECODE
+{"vsqrt",        5,      ARMVFP2,        23, 27, 0x1d,  16, 21, 0x31,    9, 11, 0x5,      6, 7, 3,     4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vsqrt",        0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vsqrt_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		DBG("VSQRT :\n");
+		
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 0xB && CRn == 1 && (OPC_2 & 0x7) == 6)
+{
+	DBG("VSQRT :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int dp_op = (BIT(8) == 1);
+	int d = dp_op ? BITS(12,15) | BIT(22) << 4 : BIT(22) | BITS(12,15) << 1;
+	int m = dp_op ? BITS(0,3) | BIT(5) << 4 : BIT(5) | BITS(0,3) << 1;
+	Value* v;
+	Value* tmp;
+	if(dp_op){
+		v = SHL(ZEXT64(IBITCAST32(FR32(2 * m + 1))),CONST64(32));
+		tmp = ZEXT64(IBITCAST32(FR32(2 * m)));
+		v = OR(v,tmp);
+		v = FPSQRT(FPBITCAST64(v));
+		tmp = TRUNC32(LSHR(IBITCAST64(v),CONST64(32)));
+		v = TRUNC32(AND(IBITCAST64(v),CONST64( 0xffffffff)));		
+		LETFPS(2 * d , FPBITCAST32(v));
+		LETFPS(2 * d + 1, FPBITCAST32(tmp));
+	}else {
+		v = FR32(m);
+		v = FPSQRT(FPEXT(64,v));
+		v = FPTRUNC(32,v);
+		LETFPS(d,v);
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VCMP VCMPE */
+/* cond 1110 1D11 0100 Vd-- 101X E1M0 Vm-- Encoding 1 */
+#define vfpinstr 	vcmp
+#define vfpinstr_inst 	vcmp_inst
+#define VFPLABEL_INST 	VCMP_INST
+#ifdef VFP_DECODE
+{"vcmp",        5,      ARMVFP2,        23, 27, 0x1d,  16, 21, 0x34,    9, 11, 0x5,      6, 6, 1,     4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vcmp",        0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vcmp_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+
+		DBG("VCMP(1) :\n");
+
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 0xB && CRn == 4 && (OPC_2 & 0x2) == 2)
+{
+	DBG("VCMP(1) :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instruction is executed out of JIT.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int dp_op = (BIT(8) == 1);
+	int d = dp_op ? BITS(12,15) | BIT(22) << 4 : BIT(22) | BITS(12,15) << 1;
+	int m = dp_op ? BITS(0,3) | BIT(5) << 4 : BIT(5) | BITS(0,3) << 1;
+	Value* v;
+	Value* tmp;
+	Value* n;
+	Value* z;
+	Value* c;
+	Value* vt;
+	Value* v1;
+	Value* nzcv;
+	if(dp_op){
+		v = SHL(ZEXT64(IBITCAST32(FR32(2 * m + 1))),CONST64(32));
+		tmp = ZEXT64(IBITCAST32(FR32(2 * m)));
+		v1 = OR(v,tmp);
+		v = SHL(ZEXT64(IBITCAST32(FR32(2 * d + 1))),CONST64(32));
+		tmp = ZEXT64(IBITCAST32(FR32(2 * d)));
+		v = OR(v,tmp);
+		z = FPCMP_OEQ(FPBITCAST64(v),FPBITCAST64(v1));
+		n = FPCMP_OLT(FPBITCAST64(v),FPBITCAST64(v1));
+		c = FPCMP_OGE(FPBITCAST64(v),FPBITCAST64(v1)); 
+		tmp =  FPCMP_UNO(FPBITCAST64(v),FPBITCAST64(v1));
+		v1 = tmp;
+		c = OR(c,tmp);
+		n = SHL(ZEXT32(n),CONST32(31));
+		z = SHL(ZEXT32(z),CONST32(30));
+		c = SHL(ZEXT32(c),CONST32(29));
+		v1 = SHL(ZEXT32(v1),CONST(28));
+		nzcv = OR(OR(OR(n,z),c),v1);	
+		v = R(VFP_FPSCR);
+		tmp = OR(nzcv,AND(v,CONST32(0x0fffffff)));
+		LET(VFP_FPSCR,tmp);
+	}else {
+		z = FPCMP_OEQ(FR32(d),FR32(m));
+		n = FPCMP_OLT(FR32(d),FR32(m));
+		c = FPCMP_OGE(FR32(d),FR32(m)); 
+		tmp = FPCMP_UNO(FR32(d),FR32(m));
+		c = OR(c,tmp);
+		v1 = tmp;
+		n = SHL(ZEXT32(n),CONST32(31));
+		z = SHL(ZEXT32(z),CONST32(30));
+		c = SHL(ZEXT32(c),CONST32(29));
+		v1 = SHL(ZEXT32(v1),CONST(28));
+		nzcv = OR(OR(OR(n,z),c),v1);	
+		v = R(VFP_FPSCR);
+		tmp = OR(nzcv,AND(v,CONST32(0x0fffffff)));
+		LET(VFP_FPSCR,tmp);
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VCMP VCMPE */
+/* cond 1110 1D11 0100 Vd-- 101X E1M0 Vm-- Encoding 2 */
+#define vfpinstr 	vcmp2
+#define vfpinstr_inst 	vcmp2_inst
+#define VFPLABEL_INST 	VCMP2_INST
+#ifdef VFP_DECODE
+{"vcmp2",        5,      ARMVFP2,        23, 27, 0x1d,  16, 21, 0x35,    9, 11, 0x5,     0, 6, 0x40},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vcmp2",        0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vcmp2_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		DBG("VCMP(2) :\n");
+
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 0xB && CRn == 5 && (OPC_2 & 0x2) == 2 && CRm == 0)
+{
+	DBG("VCMP(2) :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instruction will executed out of JIT.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int dp_op = (BIT(8) == 1);
+	int d = dp_op ? BITS(12,15) | BIT(22) << 4 : BIT(22) | BITS(12,15) << 1;
+	//int m = dp_op ? BITS(0,3) | BIT(5) << 4 : BIT(5) | BITS(0,3) << 1;
+	Value* v;
+	Value* tmp;
+	Value* n;
+	Value* z;
+	Value* c;
+	Value* vt;
+	Value* v1;
+	Value* nzcv;
+	if(dp_op){
+		v1 = CONST64(0);
+		v = SHL(ZEXT64(IBITCAST32(FR32(2 * d + 1))),CONST64(32));
+		tmp = ZEXT64(IBITCAST32(FR32(2 * d)));
+		v = OR(v,tmp);
+		z = FPCMP_OEQ(FPBITCAST64(v),FPBITCAST64(v1));
+		n = FPCMP_OLT(FPBITCAST64(v),FPBITCAST64(v1));
+		c = FPCMP_OGE(FPBITCAST64(v),FPBITCAST64(v1)); 
+		tmp =  FPCMP_UNO(FPBITCAST64(v),FPBITCAST64(v1));
+		v1 = tmp;
+		c = OR(c,tmp);
+		n = SHL(ZEXT32(n),CONST32(31));
+		z = SHL(ZEXT32(z),CONST32(30));
+		c = SHL(ZEXT32(c),CONST32(29));
+		v1 = SHL(ZEXT32(v1),CONST(28));
+		nzcv = OR(OR(OR(n,z),c),v1);	
+		v = R(VFP_FPSCR);
+		tmp = OR(nzcv,AND(v,CONST32(0x0fffffff)));
+		LET(VFP_FPSCR,tmp);
+	}else {
+		v1 = CONST(0);
+		v1 = FPBITCAST32(v1);
+		z = FPCMP_OEQ(FR32(d),v1);
+		n = FPCMP_OLT(FR32(d),v1);
+		c = FPCMP_OGE(FR32(d),v1); 
+		tmp = FPCMP_UNO(FR32(d),v1);
+		c = OR(c,tmp);
+		v1 = tmp;
+		n = SHL(ZEXT32(n),CONST32(31));
+		z = SHL(ZEXT32(z),CONST32(30));
+		c = SHL(ZEXT32(c),CONST32(29));
+		v1 = SHL(ZEXT32(v1),CONST(28));
+		nzcv = OR(OR(OR(n,z),c),v1);	
+		v = R(VFP_FPSCR);
+		tmp = OR(nzcv,AND(v,CONST32(0x0fffffff)));
+		LET(VFP_FPSCR,tmp);
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VCVTBDS between double and single */
+/* cond 1110 1D11 0111 Vd-- 101X 11M0 Vm-- */
+#define vfpinstr 	vcvtbds
+#define vfpinstr_inst 	vcvtbds_inst
+#define VFPLABEL_INST 	VCVTBDS_INST
+#ifdef VFP_DECODE
+{"vcvt(bds)",   5,      ARMVFP2,        23, 27, 0x1d,  16, 21, 0x37,    9, 11, 0x5,      6, 7, 3,     4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vcvt(bds)",        0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vcvtbds_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		DBG("VCVT(BDS) :\n");
+
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 0xB && CRn == 7 && (OPC_2 & 0x6) == 6)
+{
+	DBG("VCVT(BDS) :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instruction is executed out.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int dp_op = (BIT(8) == 1);
+	int d = dp_op ? BITS(12,15) << 1 | BIT(22) : BIT(22) << 4 | BITS(12,15);
+	int m = dp_op ? BITS(0,3) | BIT(5) << 4 : BIT(5) | BITS(0,3) << 1;
+	int d2s = dp_op;
+	Value* v;
+	Value* tmp;
+	Value* v1;
+	if(d2s){
+		v = SHL(ZEXT64(IBITCAST32(FR32(2 * m + 1))),CONST64(32));
+		tmp = ZEXT64(IBITCAST32(FR32(2 * m)));
+		v1 = OR(v,tmp);
+		tmp = FPTRUNC(32,FPBITCAST64(v1));
+		LETFPS(d,tmp);	
+	}else {
+		v = FR32(m);
+		tmp = FPEXT(64,v);
+		v = IBITCAST64(tmp);
+		tmp = TRUNC32(AND(v,CONST64(0xffffffff)));
+		v1 = TRUNC32(LSHR(v,CONST64(32)));
+		LETFPS(2 * d, FPBITCAST32(tmp) );
+		LETFPS(2 * d + 1, FPBITCAST32(v1));
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VCVTBFF between floating point and fixed point */
+/* cond 1110 1D11 1op2 Vd-- 101X X1M0 Vm-- */
+#define vfpinstr 	vcvtbff
+#define vfpinstr_inst 	vcvtbff_inst
+#define VFPLABEL_INST 	VCVTBFF_INST
+#ifdef VFP_DECODE
+{"vcvt(bff)",   6,      ARMVFP3,        23, 27, 0x1d,  19, 21, 0x7,     17, 17, 0x1,      9, 11, 0x5,  	6, 6, 1},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vcvt(bff)",   0,      ARMVFP3,         4, 4, 1},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vcvtbff_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;VFP_DEBUG_UNTESTED(VCVTBFF);
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		DBG("VCVT(BFF) :\n");
+
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 0xB && CRn >= 0xA && (OPC_2 & 0x2) == 2)
+{
+	DBG("VCVT(BFF) :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	arch_arm_undef(cpu, bb, instr);
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VCVTBFI between floating point and integer */
+/* cond 1110 1D11 1op2 Vd-- 101X X1M0 Vm-- */
+#define vfpinstr 	vcvtbfi
+#define vfpinstr_inst 	vcvtbfi_inst
+#define VFPLABEL_INST 	VCVTBFI_INST
+#ifdef VFP_DECODE
+{"vcvt(bfi)",   5,      ARMVFP2,        23, 27, 0x1d,  19, 21, 0x7,     9, 11, 0x5,      6, 6, 1,     4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vcvt(bfi)",   0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vcvtbfi_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		DBG("VCVT(BFI) :\n");
+		
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 0xB && CRn > 7 && (OPC_2 & 0x2) == 2)
+{
+	DBG("VCVT(BFI) :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	DBG("\t\tin %s, instruction will be executed out of JIT.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s, instruction will be executed out of JIT.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	unsigned int opc2 = BITS(16,18);
+	int to_integer = ((opc2 >> 2) == 1);	
+	int dp_op =  (BIT(8) == 1);
+	unsigned int op = BIT(7);
+	int m,d;
+	Value* v;
+	Value* hi;
+	Value* lo;
+	Value* v64; 
+	if(to_integer){
+		d = BIT(22) | (BITS(12,15) << 1);
+		if(dp_op)
+			m = BITS(0,3) | BIT(5) << 4;
+		else
+			m = BIT(5) | BITS(0,3) << 1;
+	}else {
+		m = BIT(5) | BITS(0,3) << 1;
+		if(dp_op)
+			d = BITS(12,15) | BIT(22) << 4;
+ 		else
+			d  = BIT(22) | BITS(12,15) << 1;		
+	}
+	if(to_integer){
+		if(dp_op){
+			lo = FR32(m * 2);
+		        hi = FR32(m * 2 + 1);	
+			hi = ZEXT64(IBITCAST32(hi));
+			lo = ZEXT64(IBITCAST32(lo));
+			v64 = OR(SHL(hi,CONST64(32)),lo);	
+			if(BIT(16)){
+				v = FPTOSI(32,FPBITCAST64(v64));
+			}
+			else
+				v = FPTOUI(32,FPBITCAST64(v64));
+				
+				v = FPBITCAST32(v);
+				LETFPS(d,v);
+		}else {
+			v = FR32(m);
+			if(BIT(16)){
+				
+				v = FPTOSI(32,v);
+			}
+			else
+				v = FPTOUI(32,v);
+				LETFPS(d,FPBITCAST32(v));
+		}
+	}else {
+		if(dp_op){	
+			v = IBITCAST32(FR32(m));
+			if(BIT(7))
+				v64 = SITOFP(64,v); 
+			else
+				v64 = UITOFP(64,v);
+			v = IBITCAST64(v64);
+			hi = FPBITCAST32(TRUNC32(LSHR(v,CONST64(32))));
+			lo = FPBITCAST32(TRUNC32(AND(v,CONST64(0xffffffff))));
+			LETFPS(2 * d , lo);
+			LETFPS(2 * d + 1, hi);
+		}else {
+			v = IBITCAST32(FR32(m));
+			if(BIT(7))
+				v = SITOFP(32,v);
+			else
+				v = UITOFP(32,v);
+				LETFPS(d,v);
+		}
+	}
+	return No_exp;
+}
+
+/**
+* @brief The implementation of c language for vcvtbfi instruction of dyncom
+*
+* @param cpu
+* @param instr
+*
+* @return 
+*/
+int vcvtbfi_instr_impl(arm_core_t* cpu, uint32 instr){
+	int dp_operation = BIT(8);
+	int ret;
+	if (dp_operation)
+		ret = vfp_double_cpdo(cpu, instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+	else
+		ret = vfp_single_cpdo(cpu, instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+	vfp_raise_exceptions(cpu, ret, instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+	return 0;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* MRC / MCR instructions */
+/* cond 1110 AAAL XXXX XXXX 101C XBB1 XXXX */
+/* cond 1110 op11 CRn- Rt-- copr op21 CRm- */
+
+/* ----------------------------------------------------------------------- */
+/* VMOVBRS between register and single precision */
+/* cond 1110 000o Vn-- Rt-- 1010 N001 0000 */
+/* cond 1110 op11 CRn- Rt-- copr op21 CRm- MRC */
+#define vfpinstr 	vmovbrs
+#define vfpinstr_inst 	vmovbrs_inst
+#define VFPLABEL_INST 	VMOVBRS_INST
+#ifdef VFP_DECODE
+{"vmovbrs",    3,    ARMVFP2,    21, 27, 0x70,    8, 11, 0xA,    0, 6, 0x10},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vmovbrs",    0,    ARMVFP2,    0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vmovbrs_inst {
+	unsigned int to_arm;
+	unsigned int t;
+	unsigned int n;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx     = index;
+	inst_base->br     = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->to_arm   = BIT(inst, 20) == 1;
+	inst_cream->t        = BITS(inst, 12, 15);
+	inst_cream->n        = BIT(inst, 7) | BITS(inst, 16, 19)<<1;
+
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+           
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		VMOVBRS(cpu, inst_cream->to_arm, inst_cream->t, inst_cream->n, &(cpu->Reg[inst_cream->t]));
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_MRC_TRANS
+if (OPC_1 == 0x0 && CRm == 0 && (OPC_2 & 0x3) == 0)
+{
+	/* VMOV r to s */
+	/* Transfering Rt is not mandatory, as the value of interest is pointed by value */
+	VMOVBRS(state, BIT(20), Rt, BIT(7)|CRn<<1, value);
+	return ARMul_DONE;
+}
+#endif
+#ifdef VFP_MCR_TRANS
+if (OPC_1 == 0x0 && CRm == 0 && (OPC_2 & 0x3) == 0)
+{
+	/* VMOV s to r */
+	/* Transfering Rt is not mandatory, as the value of interest is pointed by value */
+	VMOVBRS(state, BIT(20), Rt, BIT(7)|CRn<<1, &value);
+	return ARMul_DONE;
+}
+#endif
+#ifdef VFP_MRC_IMPL
+void VMOVBRS(ARMul_State * state, ARMword to_arm, ARMword t, ARMword n, ARMword *value)
+{
+	DBG("VMOV(BRS) :\n");
+	if (to_arm)
+	{
+		DBG("\tr%d <= s%d=[%x]\n", t, n, state->ExtReg[n]);
+		*value = state->ExtReg[n];
+	}
+	else
+	{
+		DBG("\ts%d <= r%d=[%x]\n", n, t, *value);
+		state->ExtReg[n] = *value;
+	}
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("VMOV(BRS) :\n");
+	int to_arm   = BIT(20) == 1;
+	int t        = BITS(12, 15);
+	int n        = BIT(7) | BITS(16, 19)<<1;
+
+	if (to_arm)
+	{
+		DBG("\tr%d <= s%d\n", t, n);
+		LET(t, IBITCAST32(FR32(n)));
+	}
+	else
+	{
+		DBG("\ts%d <= r%d\n", n, t);
+		LETFPS(n, FPBITCAST32(R(t)));
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VMSR */
+/* cond 1110 1110 reg- Rt-- 1010 0001 0000 */
+/* cond 1110 op10 CRn- Rt-- copr op21 CRm- MCR */
+#define vfpinstr 	vmsr
+#define vfpinstr_inst 	vmsr_inst
+#define VFPLABEL_INST 	VMSR_INST
+#ifdef VFP_DECODE
+{"vmsr",    2,    ARMVFP2,    20, 27, 0xEE,    0, 11, 0xA10},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vmsr",    0,    ARMVFP2,    0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vmsr_inst {
+	unsigned int reg;
+	unsigned int Rd;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx     = index;
+	inst_base->br     = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->reg  = BITS(inst, 16, 19);
+	inst_cream->Rd   = BITS(inst, 12, 15);
+   
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		/* FIXME: special case for access to FPSID and FPEXC, VFP must be disabled ,
+		   and in privilegied mode */
+		/* Exceptions must be checked, according to v7 ref manual */
+		CHECK_VFP_ENABLED;
+           
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		VMSR(cpu, inst_cream->reg, inst_cream->Rd);
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_MCR_TRANS
+if (OPC_1 == 0x7 && CRm == 0 && OPC_2 == 0)
+{
+	VMSR(state, CRn, Rt);
+	return ARMul_DONE;
+}
+#endif
+#ifdef VFP_MCR_IMPL
+void VMSR(ARMul_State * state, ARMword reg, ARMword Rt)
+{
+	if (reg == 1)
+	{
+		DBG("VMSR :\tfpscr <= r%d=[%x]\n", Rt, state->Reg[Rt]);
+		state->VFP[VFP_OFFSET(VFP_FPSCR)] = state->Reg[Rt];
+	}
+	else if (reg == 8)
+	{
+		DBG("VMSR :\tfpexc <= r%d=[%x]\n", Rt, state->Reg[Rt]);
+		state->VFP[VFP_OFFSET(VFP_FPEXC)] = state->Reg[Rt];
+	}
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	DBG("VMSR :");
+	if(RD == 15) {
+		printf("in %s is not implementation.\n", __FUNCTION__);
+		exit(-1);
+	}
+	
+	Value *data = NULL;
+	int reg = RN;
+	int Rt   = RD;
+	if (reg == 1)
+	{
+		LET(VFP_FPSCR, R(Rt));
+		DBG("\tflags <= fpscr\n");
+	}
+	else
+	{
+		switch (reg)
+		{
+		case 8:
+			LET(VFP_FPEXC, R(Rt));
+			DBG("\tfpexc <= r%d \n", Rt);
+			break;
+		default:
+			DBG("\tSUBARCHITECTURE DEFINED\n");
+			break;
+		}
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VMOVBRC register to scalar */
+/* cond 1110 0XX0 Vd-- Rt-- 1011 DXX1 0000 */
+/* cond 1110 op10 CRn- Rt-- copr op21 CRm- MCR */
+#define vfpinstr 	vmovbrc
+#define vfpinstr_inst 	vmovbrc_inst
+#define VFPLABEL_INST 	VMOVBRC_INST
+#ifdef VFP_DECODE
+{"vmovbrc",    4,    ARMVFP2,    23, 27, 0x1C,    20, 20, 0x0,    8,11,0xB,    0,4,0x10},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vmovbrc",    0,    ARMVFP2,    0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vmovbrc_inst {
+	unsigned int esize;
+	unsigned int index;
+	unsigned int d;
+	unsigned int t;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx     = index;
+	inst_base->br     = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->d     = BITS(inst, 16, 19)|BIT(inst, 7)<<4;
+	inst_cream->t     = BITS(inst, 12, 15);
+	/* VFP variant of instruction */
+	inst_cream->esize = 32;
+	inst_cream->index = BIT(inst, 21);
+   
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+		
+		VFP_DEBUG_UNIMPLEMENTED(VMOVBRC);
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_MCR_TRANS
+if ((OPC_1 & 0x4) == 0 && CoProc == 11 && CRm == 0)
+{
+	VFP_DEBUG_UNIMPLEMENTED(VMOVBRC);
+	return ARMul_DONE;
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	arch_arm_undef(cpu, bb, instr);
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VMRS */
+/* cond 1110 1111 CRn- Rt-- 1010 0001 0000 */
+/* cond 1110 op11 CRn- Rt-- copr op21 CRm- MRC */
+#define vfpinstr 	vmrs
+#define vfpinstr_inst 	vmrs_inst
+#define VFPLABEL_INST 	VMRS_INST
+#ifdef VFP_DECODE
+{"vmrs",        2,      ARMVFP2,        20, 27, 0xEF,     0, 11, 0xa10},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vmrs",    0,    ARMVFP2,    0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vmrs_inst {
+	unsigned int reg;
+	unsigned int Rt;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx     = index;
+	inst_base->br     = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->reg  = BITS(inst, 16, 19);
+	inst_cream->Rt	 = BITS(inst, 12, 15);
+   
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		/* FIXME: special case for access to FPSID and FPEXC, VFP must be disabled,
+		   and in privilegied mode */
+		/* Exceptions must be checked, according to v7 ref manual */
+		CHECK_VFP_ENABLED;
+		
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+		
+		DBG("VMRS :");
+	
+		if (inst_cream->reg == 1) /* FPSCR */
+		{
+			if (inst_cream->Rt != 15)
+			{	
+				cpu->Reg[inst_cream->Rt] = cpu->VFP[VFP_OFFSET(VFP_FPSCR)];
+				DBG("\tr%d <= fpscr[%08x]\n", inst_cream->Rt, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+			}
+			else
+			{	
+				cpu->NFlag = (cpu->VFP[VFP_OFFSET(VFP_FPSCR)] >> 31) & 1;
+				cpu->ZFlag = (cpu->VFP[VFP_OFFSET(VFP_FPSCR)] >> 30) & 1;
+				cpu->CFlag = (cpu->VFP[VFP_OFFSET(VFP_FPSCR)] >> 29) & 1;
+				cpu->VFlag = (cpu->VFP[VFP_OFFSET(VFP_FPSCR)] >> 28) & 1;
+				DBG("\tflags <= fpscr[%1xxxxxxxx]\n", cpu->VFP[VFP_OFFSET(VFP_FPSCR)]>>28);
+			}
+		} 
+		else
+		{
+			switch (inst_cream->reg)
+			{
+			case 0:
+				cpu->Reg[inst_cream->Rt] = cpu->VFP[VFP_OFFSET(VFP_FPSID)];
+				DBG("\tr%d <= fpsid[%08x]\n", inst_cream->Rt, cpu->VFP[VFP_OFFSET(VFP_FPSID)]);
+				break;
+			case 6:
+				/* MVFR1, VFPv3 only ? */
+				DBG("\tr%d <= MVFR1 unimplemented\n", inst_cream->Rt);
+				break;
+			case 7:
+				/* MVFR0, VFPv3 only? */
+				DBG("\tr%d <= MVFR0 unimplemented\n", inst_cream->Rt);
+				break;
+			case 8:
+				cpu->Reg[inst_cream->Rt] = cpu->VFP[VFP_OFFSET(VFP_FPEXC)];
+				DBG("\tr%d <= fpexc[%08x]\n", inst_cream->Rt, cpu->VFP[VFP_OFFSET(VFP_FPEXC)]);
+				break;
+			default:
+				DBG("\tSUBARCHITECTURE DEFINED\n");
+				break;
+			}
+		}
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_MRC_TRANS
+if (OPC_1 == 0x7 && CRm == 0 && OPC_2 == 0)
+{
+	VMRS(state, CRn, Rt, value);
+	return ARMul_DONE;
+}
+#endif
+#ifdef VFP_MRC_IMPL
+void VMRS(ARMul_State * state, ARMword reg, ARMword Rt, ARMword * value)
+{
+	DBG("VMRS :");
+	if (reg == 1)
+	{
+		if (Rt != 15)
+		{
+			*value = state->VFP[VFP_OFFSET(VFP_FPSCR)];
+			DBG("\tr%d <= fpscr[%08x]\n", Rt, state->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		}
+		else
+		{
+			*value = state->VFP[VFP_OFFSET(VFP_FPSCR)] ;
+			DBG("\tflags <= fpscr[%1xxxxxxxx]\n", state->VFP[VFP_OFFSET(VFP_FPSCR)]>>28);
+		}
+	}
+	else
+	{
+		switch (reg)
+		{
+		case 0:
+			*value = state->VFP[VFP_OFFSET(VFP_FPSID)];
+			DBG("\tr%d <= fpsid[%08x]\n", Rt, state->VFP[VFP_OFFSET(VFP_FPSID)]);
+			break;
+		case 6:
+			/* MVFR1, VFPv3 only ? */
+			DBG("\tr%d <= MVFR1 unimplemented\n", Rt);
+			break;
+		case 7:
+			/* MVFR0, VFPv3 only? */
+			DBG("\tr%d <= MVFR0 unimplemented\n", Rt);
+			break;
+		case 8:
+			*value = state->VFP[VFP_OFFSET(VFP_FPEXC)];
+			DBG("\tr%d <= fpexc[%08x]\n", Rt, state->VFP[VFP_OFFSET(VFP_FPEXC)]);
+			break;
+		default:
+			DBG("\tSUBARCHITECTURE DEFINED\n");
+			break;
+		}
+	}
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	DBG("\t\tin %s .\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	
+	Value *data = NULL;
+	int reg = BITS(16, 19);;
+	int Rt   = BITS(12, 15);
+	DBG("VMRS : reg=%d, Rt=%d\n", reg, Rt);
+	if (reg == 1)
+	{
+		if (Rt != 15)
+		{
+			LET(Rt, R(VFP_FPSCR));
+			DBG("\tr%d <= fpscr\n", Rt);
+		}
+		else
+		{
+			//LET(Rt, R(VFP_FPSCR));
+			update_cond_from_fpscr(cpu, instr, bb, pc);
+			DBG("In %s, \tflags <= fpscr\n", __FUNCTION__);
+		}
+	}
+	else
+	{
+		switch (reg)
+		{
+		case 0:
+			LET(Rt, R(VFP_FPSID));
+			DBG("\tr%d <= fpsid\n", Rt);
+			break;
+		case 6:
+			/* MVFR1, VFPv3 only ? */
+			DBG("\tr%d <= MVFR1 unimplemented\n", Rt);
+			break;
+		case 7:
+			/* MVFR0, VFPv3 only? */
+			DBG("\tr%d <= MVFR0 unimplemented\n", Rt);
+			break;
+		case 8:
+			LET(Rt, R(VFP_FPEXC));
+			DBG("\tr%d <= fpexc\n", Rt);
+			break;
+		default:
+			DBG("\tSUBARCHITECTURE DEFINED\n");
+			break;
+		}
+	}
+
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VMOVBCR scalar to register */
+/* cond 1110 XXX1 Vd-- Rt-- 1011 NXX1 0000 */
+/* cond 1110 op11 CRn- Rt-- copr op21 CRm- MCR */
+#define vfpinstr 	vmovbcr
+#define vfpinstr_inst 	vmovbcr_inst
+#define VFPLABEL_INST 	VMOVBCR_INST
+#ifdef VFP_DECODE
+{"vmovbcr",    4,    ARMVFP2,    24, 27, 0xE,    20, 20, 1,    8, 11,0xB,    0,4, 0x10},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vmovbcr",    0,    ARMVFP2,    0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vmovbcr_inst {
+	unsigned int esize;
+	unsigned int index;
+	unsigned int d;
+	unsigned int t;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx     = index;
+	inst_base->br     = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->d     = BITS(inst, 16, 19)|BIT(inst, 7)<<4;
+	inst_cream->t     = BITS(inst, 12, 15);
+	/* VFP variant of instruction */
+	inst_cream->esize = 32;
+	inst_cream->index = BIT(inst, 21);
+   
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+		
+		VFP_DEBUG_UNIMPLEMENTED(VMOVBCR);
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_MCR_TRANS
+if (CoProc == 11 && CRm == 0)
+{
+	VFP_DEBUG_UNIMPLEMENTED(VMOVBCR);
+	return ARMul_DONE;
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	arch_arm_undef(cpu, bb, instr);
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* MRRC / MCRR instructions */
+/* cond 1100 0101 Rt2- Rt-- copr opc1 CRm- MRRC */
+/* cond 1100 0100 Rt2- Rt-- copr opc1 CRm- MCRR */
+
+/* ----------------------------------------------------------------------- */
+/* VMOVBRRSS between 2 registers to 2 singles */
+/* cond 1100 010X Rt2- Rt-- 1010 00X1 Vm-- */
+/* cond 1100 0101 Rt2- Rt-- copr opc1 CRm- MRRC */
+#define vfpinstr 	vmovbrrss
+#define vfpinstr_inst 	vmovbrrss_inst
+#define VFPLABEL_INST 	VMOVBRRSS_INST
+#ifdef VFP_DECODE
+{"vmovbrrss",    3,    ARMVFP2,    21, 27, 0x62,    8, 11, 0xA,    4, 4, 1},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vmovbrrss",    0,    ARMVFP2,    0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vmovbrrss_inst {
+	unsigned int to_arm;
+	unsigned int t;
+	unsigned int t2;
+	unsigned int m;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx     = index;
+	inst_base->br     = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->to_arm     = BIT(inst, 20) == 1;
+	inst_cream->t          = BITS(inst, 12, 15);
+	inst_cream->t2         = BITS(inst, 16, 19);
+	inst_cream->m          = BITS(inst, 0, 3)<<1|BIT(inst, 5);
+   
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+		
+		VFP_DEBUG_UNIMPLEMENTED(VMOVBRRSS);
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_MCRR_TRANS
+if (CoProc == 10 && (OPC_1 & 0xD) == 1)
+{
+	VFP_DEBUG_UNIMPLEMENTED(VMOVBRRSS);
+	return ARMul_DONE;
+}
+#endif
+#ifdef VFP_MRRC_TRANS
+if (CoProc == 10 && (OPC_1 & 0xD) == 1)
+{
+	VFP_DEBUG_UNIMPLEMENTED(VMOVBRRSS);
+	return ARMul_DONE;
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	arch_arm_undef(cpu, bb, instr);
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VMOVBRRD between 2 registers and 1 double */
+/* cond 1100 010X Rt2- Rt-- 1011 00X1 Vm-- */
+/* cond 1100 0101 Rt2- Rt-- copr opc1 CRm- MRRC */
+#define vfpinstr 	vmovbrrd
+#define vfpinstr_inst 	vmovbrrd_inst
+#define VFPLABEL_INST 	VMOVBRRD_INST
+#ifdef VFP_DECODE
+{"vmovbrrd",    3,    ARMVFP2,    21, 27, 0x62,    6, 11, 0x2c,    4, 4, 1},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vmovbrrd",    0,    ARMVFP2,    0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vmovbrrd_inst {
+	unsigned int to_arm;
+	unsigned int t;
+	unsigned int t2;
+	unsigned int m;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx     = index;
+	inst_base->br     = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->to_arm   = BIT(inst, 20) == 1;
+	inst_cream->t        = BITS(inst, 12, 15);
+	inst_cream->t2       = BITS(inst, 16, 19);
+	inst_cream->m        = BIT(inst, 5)<<4 | BITS(inst, 0, 3);
+
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+		
+		VMOVBRRD(cpu, inst_cream->to_arm, inst_cream->t, inst_cream->t2, inst_cream->m, 
+				&(cpu->Reg[inst_cream->t]), &(cpu->Reg[inst_cream->t2]));
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_MCRR_TRANS
+if (CoProc == 11 && (OPC_1 & 0xD) == 1)
+{
+	/* Transfering Rt and Rt2 is not mandatory, as the value of interest is pointed by value1 and value2 */
+	VMOVBRRD(state, BIT(20), Rt, Rt2, BIT(5)<<4|CRm, &value1, &value2);
+	return ARMul_DONE;
+}
+#endif
+#ifdef VFP_MRRC_TRANS
+if (CoProc == 11 && (OPC_1 & 0xD) == 1)
+{
+	/* Transfering Rt and Rt2 is not mandatory, as the value of interest is pointed by value1 and value2 */
+	VMOVBRRD(state, BIT(20), Rt, Rt2, BIT(5)<<4|CRm, value1, value2);
+	return ARMul_DONE;
+}
+#endif
+#ifdef VFP_MRRC_IMPL
+void VMOVBRRD(ARMul_State * state, ARMword to_arm, ARMword t, ARMword t2, ARMword n, ARMword *value1, ARMword *value2)
+{
+	DBG("VMOV(BRRD) :\n");
+	if (to_arm)
+	{
+		DBG("\tr[%d-%d] <= s[%d-%d]=[%x-%x]\n", t2, t, n*2+1, n*2, state->ExtReg[n*2+1], state->ExtReg[n*2]);
+		*value2 = state->ExtReg[n*2+1];
+		*value1 = state->ExtReg[n*2];
+	}
+	else
+	{
+		DBG("\ts[%d-%d] <= r[%d-%d]=[%x-%x]\n", n*2+1, n*2, t2, t, *value2, *value1);
+		state->ExtReg[n*2+1] = *value2;
+		state->ExtReg[n*2] = *value1;
+	}
+}
+
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	if(instr >> 28 != 0xe)
+		*tag |= TAG_CONDITIONAL;
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int to_arm   = BIT(20) == 1;
+	int t        = BITS(12, 15);
+	int t2       = BITS(16, 19);
+	int n        = BIT(5)<<4 | BITS(0, 3);
+	if(to_arm){
+		LET(t, IBITCAST32(FR32(n * 2)));
+		LET(t2, IBITCAST32(FR32(n * 2 + 1)));
+	}
+	else{
+		LETFPS(n * 2, FPBITCAST32(R(t)));
+		LETFPS(n * 2 + 1, FPBITCAST32(R(t2)));
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* LDC/STC between 2 registers and 1 double */
+/* cond 110X XXX1 Rn-- CRd- copr imm- imm- LDC */
+/* cond 110X XXX0 Rn-- CRd- copr imm8 imm8 STC */
+
+/* ----------------------------------------------------------------------- */
+/* VSTR */
+/* cond 1101 UD00 Rn-- Vd-- 101X imm8 imm8 */
+#define vfpinstr 	vstr
+#define vfpinstr_inst 	vstr_inst
+#define VFPLABEL_INST 	VSTR_INST
+#ifdef VFP_DECODE
+{"vstr",        3,      ARMVFP2,        24, 27, 0xd,   20, 21, 0,       9, 11, 0x5},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vstr",    0,    ARMVFP2,    0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vstr_inst {
+	unsigned int single;
+	unsigned int n;
+	unsigned int d;
+	unsigned int imm32;
+	unsigned int add;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+	
+	inst_cream->single = BIT(inst, 8) == 0;
+	inst_cream->add	   = BIT(inst, 23);
+	inst_cream->imm32  = BITS(inst, 0,7) << 2;
+	inst_cream->d      = (inst_cream->single ? BITS(inst, 12, 15)<<1|BIT(inst, 22) : BITS(inst, 12, 15)|BIT(inst, 22)<<4);
+	inst_cream->n	   = BITS(inst, 16, 19);
+
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+		
+		unsigned int base = (inst_cream->n == 15 ? (cpu->Reg[inst_cream->n] & 0xFFFFFFFC) + 8 : cpu->Reg[inst_cream->n]);
+		addr = (inst_cream->add ? base + inst_cream->imm32 : base - inst_cream->imm32);
+		DBG("VSTR :\n");
+		
+		
+		if (inst_cream->single)
+		{
+			fault = check_address_validity(cpu, addr, &phys_addr, 0);
+			if (fault) goto MMU_EXCEPTION;
+			fault = interpreter_write_memory(core, addr, phys_addr, cpu->ExtReg[inst_cream->d], 32);
+			if (fault) goto MMU_EXCEPTION;
+			DBG("\taddr[%x] <= s%d=[%x]\n", addr, inst_cream->d, cpu->ExtReg[inst_cream->d]);
+		}
+		else
+		{
+			fault = check_address_validity(cpu, addr, &phys_addr, 0);
+			if (fault) goto MMU_EXCEPTION;
+
+			/* Check endianness */
+			fault = interpreter_write_memory(core, addr, phys_addr, cpu->ExtReg[inst_cream->d*2], 32);
+			if (fault) goto MMU_EXCEPTION;
+
+			fault = check_address_validity(cpu, addr + 4, &phys_addr, 0);
+			if (fault) goto MMU_EXCEPTION;
+
+			fault = interpreter_write_memory(core, addr + 4, phys_addr, cpu->ExtReg[inst_cream->d*2+1], 32);
+			if (fault) goto MMU_EXCEPTION;
+			DBG("\taddr[%x-%x] <= s[%d-%d]=[%x-%x]\n", addr+4, addr, inst_cream->d*2+1, inst_cream->d*2, cpu->ExtReg[inst_cream->d*2+1], cpu->ExtReg[inst_cream->d*2]);
+		}
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_STC_TRANS
+if (P == 1 && W == 0)
+{
+	return VSTR(state, type, instr, value);
+}
+#endif
+#ifdef VFP_STC_IMPL
+int VSTR(ARMul_State * state, int type, ARMword instr, ARMword * value)
+{
+	static int i = 0;
+	static int single_reg, add, d, n, imm32, regs;
+	if (type == ARMul_FIRST)
+	{
+		single_reg = BIT(8) == 0;	/* Double precision */
+		add = BIT(23);		/* */
+		imm32 = BITS(0,7)<<2;	/* may not be used */
+		d = single_reg ? BITS(12, 15)<<1|BIT(22) : BIT(22)<<4|BITS(12, 15); /* Base register */
+		n = BITS(16, 19);	/* destination register */
+		
+		DBG("VSTR :\n");
+		
+		i = 0;
+		regs = 1;
+		
+		return ARMul_DONE;
+	}
+	else if (type == ARMul_DATA)
+	{
+		if (single_reg)
+		{
+			*value = state->ExtReg[d+i];
+			DBG("\taddr[?] <= s%d=[%x]\n", d+i, state->ExtReg[d+i]);
+			i++;
+			if (i < regs)
+				return ARMul_INC;
+			else
+				return ARMul_DONE;
+		}
+		else
+		{
+			/* FIXME Careful of endianness, may need to rework this */
+			*value = state->ExtReg[d*2+i];
+			DBG("\taddr[?] <= s[%d]=[%x]\n", d*2+i, state->ExtReg[d*2+i]);
+			i++;
+			if (i < regs*2)
+				return ARMul_INC;
+			else
+				return ARMul_DONE;
+		}
+	}
+
+	return -1;
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	DBG("In %s, pc=0x%x, next_pc=0x%x\n", __FUNCTION__, pc, *next_pc);
+	*tag |= TAG_NEW_BB;
+	if(instr >> 28 != 0xe)
+		*tag |= TAG_CONDITIONAL;
+
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	int single = BIT(8) == 0;
+	int add	   = BIT(23);
+	int imm32  = BITS(0,7) << 2;
+	int d      = (single ? BITS(12, 15)<<1|BIT(22) : BITS(12, 15)|(BIT(22)<<4));
+	int n	   = BITS(16, 19);
+
+	Value* base = (n == 15) ? ADD(AND(R(n), CONST(0xFFFFFFFC)), CONST(8)): R(n);
+	Value* Addr = add ? ADD(base, CONST(imm32)) : SUB(base, CONST(imm32));
+	DBG("VSTR :\n");
+	//if(single)
+	//	bb = arch_check_mm(cpu, bb, Addr, 4, 0, cpu->dyncom_engine->bb_trap);
+	//else
+	//	bb = arch_check_mm(cpu, bb, Addr, 8, 0, cpu->dyncom_engine->bb_trap);
+	//Value* phys_addr;
+	if(single){
+		#if 0
+		phys_addr = get_phys_addr(cpu, bb, Addr, 0);
+		bb = cpu->dyncom_engine->bb;
+		arch_write_memory(cpu, bb, phys_addr, RSPR(d), 32);
+		#endif
+		//memory_write(cpu, bb, Addr, RSPR(d), 32);
+		memory_write(cpu, bb, Addr, IBITCAST32(FR32(d)), 32);
+		bb = cpu->dyncom_engine->bb;
+	}
+	else{
+		#if 0
+		phys_addr = get_phys_addr(cpu, bb, Addr, 0);
+		bb = cpu->dyncom_engine->bb;
+		arch_write_memory(cpu, bb, phys_addr, RSPR(d * 2), 32);
+		#endif
+		//memory_write(cpu, bb, Addr, RSPR(d * 2), 32);
+		memory_write(cpu, bb, Addr, IBITCAST32(FR32(d * 2)), 32);
+		bb = cpu->dyncom_engine->bb;
+		#if 0
+		phys_addr = get_phys_addr(cpu, bb, ADD(Addr, CONST(4)), 0);
+		bb = cpu->dyncom_engine->bb;
+		arch_write_memory(cpu, bb, phys_addr, RSPR(d * 2 + 1), 32);
+		#endif
+		//memory_write(cpu, bb, ADD(Addr, CONST(4)), RSPR(d * 2 + 1), 32);
+		memory_write(cpu, bb, ADD(Addr, CONST(4)), IBITCAST32(FR32(d * 2 + 1)), 32);
+		bb = cpu->dyncom_engine->bb;
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VPUSH */
+/* cond 1101 0D10 1101 Vd-- 101X imm8 imm8 */
+#define vfpinstr 	vpush
+#define vfpinstr_inst 	vpush_inst
+#define VFPLABEL_INST 	VPUSH_INST
+#ifdef VFP_DECODE
+{"vpush",       3,      ARMVFP2,        23, 27, 0x1a,  16, 21, 0x2d,    9, 11, 0x5},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vpush",    0,    ARMVFP2,    0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vpush_inst {
+	unsigned int single;
+	unsigned int d;
+	unsigned int imm32;
+	unsigned int regs;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx     = index;
+	inst_base->br     = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->single  = BIT(inst, 8) == 0;
+	inst_cream->d       = (inst_cream->single ? BITS(inst, 12, 15)<<1|BIT(inst, 22) : BITS(inst, 12, 15)|BIT(inst, 22)<<4);
+	inst_cream->imm32   = BITS(inst, 0, 7)<<2;
+	inst_cream->regs    = (inst_cream->single ? BITS(inst, 0, 7) : BITS(inst, 1, 7));
+
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+				
+		int i;
+
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		DBG("VPUSH :\n");
+			
+		addr = cpu->Reg[R13] - inst_cream->imm32;
+
+
+		for (i = 0; i < inst_cream->regs; i++)
+		{
+			if (inst_cream->single)
+			{
+				fault = check_address_validity(cpu, addr, &phys_addr, 0);
+				if (fault) goto MMU_EXCEPTION;
+				fault = interpreter_write_memory(core, addr, phys_addr, cpu->ExtReg[inst_cream->d+i], 32);
+				if (fault) goto MMU_EXCEPTION;
+				DBG("\taddr[%x] <= s%d=[%x]\n", addr, inst_cream->d+i, cpu->ExtReg[inst_cream->d+i]);
+				addr += 4;
+			}
+			else
+			{
+				/* Careful of endianness, little by default */
+				fault = check_address_validity(cpu, addr, &phys_addr, 0);
+				if (fault) goto MMU_EXCEPTION;
+				fault = interpreter_write_memory(core, addr, phys_addr, cpu->ExtReg[(inst_cream->d+i)*2], 32);
+				if (fault) goto MMU_EXCEPTION;
+
+				fault = check_address_validity(cpu, addr + 4, &phys_addr, 0);
+				if (fault) goto MMU_EXCEPTION;
+				fault = interpreter_write_memory(core, addr + 4, phys_addr, cpu->ExtReg[(inst_cream->d+i)*2 + 1], 32);
+				if (fault) goto MMU_EXCEPTION;
+				DBG("\taddr[%x-%x] <= s[%d-%d]=[%x-%x]\n", addr+4, addr, (inst_cream->d+i)*2+1, (inst_cream->d+i)*2, cpu->ExtReg[(inst_cream->d+i)*2+1], cpu->ExtReg[(inst_cream->d+i)*2]);
+				addr += 8;
+			}
+		}
+		DBG("\tsp[%x]", cpu->Reg[R13]);
+		cpu->Reg[R13] = cpu->Reg[R13] - inst_cream->imm32;
+		DBG("=>[%x]\n", cpu->Reg[R13]);
+	
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vpush_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_STC_TRANS
+if (P == 1 && U == 0 && W == 1 && Rn == 0xD)
+{
+	return VPUSH(state, type, instr, value);
+}
+#endif
+#ifdef VFP_STC_IMPL
+int VPUSH(ARMul_State * state, int type, ARMword instr, ARMword * value)
+{
+	static int i = 0;
+	static int single_regs, add, wback, d, n, imm32, regs;
+	if (type == ARMul_FIRST)
+	{
+		single_regs = BIT(8) == 0;	/* Single precision */
+		d = single_regs ? BITS(12, 15)<<1|BIT(22) : BIT(22)<<4|BITS(12, 15); /* Base register */
+		imm32 = BITS(0,7)<<2;	/* may not be used */
+		regs = single_regs ? BITS(0, 7) : BITS(1, 7); /* FSTMX if regs is odd */
+
+		DBG("VPUSH :\n");
+		DBG("\tsp[%x]", state->Reg[R13]);
+		state->Reg[R13] = state->Reg[R13] - imm32;
+		DBG("=>[%x]\n", state->Reg[R13]);
+		
+		i = 0;
+		
+		return ARMul_DONE;
+	} 
+	else if (type == ARMul_DATA)
+	{
+		if (single_regs)
+		{
+			*value = state->ExtReg[d + i];
+			DBG("\taddr[?] <= s%d=[%x]\n", d+i, state->ExtReg[d + i]);
+			i++;
+			if (i < regs)
+				return ARMul_INC;
+			else
+				return ARMul_DONE;
+		}
+		else
+		{
+			/* FIXME Careful of endianness, may need to rework this */
+			*value = state->ExtReg[d*2 + i];
+			DBG("\taddr[?] <= s[%d]=[%x]\n", d*2 + i, state->ExtReg[d*2 + i]);
+			i++;
+			if (i < regs*2)
+				return ARMul_INC;
+			else
+				return ARMul_DONE;
+		}
+	}
+
+	return -1;
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	DBG("In %s, pc=0x%x, next_pc=0x%x\n", __FUNCTION__, pc, *next_pc);
+	*tag |= TAG_NEW_BB;
+	if(instr >> 28 != 0xe)
+		*tag |= TAG_CONDITIONAL;
+
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	int single  = BIT(8) == 0;
+	int d       = (single ? BITS(12, 15)<<1|BIT(22) : BITS(12, 15)|(BIT(22)<<4));
+	int imm32   = BITS(0, 7)<<2;
+	int regs    = (single ? BITS(0, 7) : BITS(1, 7));
+
+	DBG("\t\tin %s \n", __FUNCTION__);
+	Value* Addr = SUB(R(13), CONST(imm32));
+	//if(single)
+	//	bb = arch_check_mm(cpu, bb, Addr, regs * 4, 0, cpu->dyncom_engine->bb_trap);
+	//else
+	//	bb = arch_check_mm(cpu, bb, Addr, regs * 8, 0, cpu->dyncom_engine->bb_trap);
+	//Value* phys_addr;
+	int i;
+	for (i = 0; i < regs; i++)
+	{
+		if (single)
+		{
+			//fault = interpreter_write_memory(core, addr, phys_addr, cpu->ExtReg[inst_cream->d+i], 32);
+			#if 0
+			phys_addr = get_phys_addr(cpu, bb, Addr, 0);
+			bb = cpu->dyncom_engine->bb;
+			arch_write_memory(cpu, bb, phys_addr, RSPR(d + i), 32);
+			#endif
+			//memory_write(cpu, bb, Addr, RSPR(d + i), 32);
+			memory_write(cpu, bb, Addr, IBITCAST32(FR32(d + i)), 32);
+			bb = cpu->dyncom_engine->bb;
+			Addr = ADD(Addr, CONST(4));
+		}
+		else
+		{
+			/* Careful of endianness, little by default */
+			#if 0
+			phys_addr = get_phys_addr(cpu, bb, Addr, 0);
+			bb = cpu->dyncom_engine->bb;
+			arch_write_memory(cpu, bb, phys_addr, RSPR((d + i) * 2), 32);
+			#endif
+			//memory_write(cpu, bb, Addr, RSPR((d + i) * 2), 32);
+			memory_write(cpu, bb, Addr, IBITCAST32(FR32((d + i) * 2)), 32);
+			bb = cpu->dyncom_engine->bb;
+			#if 0
+			phys_addr = get_phys_addr(cpu, bb, ADD(Addr, CONST(4)), 0);
+			bb = cpu->dyncom_engine->bb;
+			arch_write_memory(cpu, bb, phys_addr, RSPR((d + i) * 2 + 1), 32);
+			#endif
+			//memory_write(cpu, bb, ADD(Addr, CONST(4)), RSPR((d + i) * 2 + 1), 32);
+			memory_write(cpu, bb, ADD(Addr, CONST(4)), IBITCAST32(FR32((d + i) * 2 + 1)), 32);
+			bb = cpu->dyncom_engine->bb;
+
+			Addr = ADD(Addr, CONST(8));
+		}
+	}
+	LET(13, SUB(R(13), CONST(imm32)));
+
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VSTM */
+/* cond 110P UDW0 Rn-- Vd-- 101X imm8 imm8 */
+#define vfpinstr 	vstm
+#define vfpinstr_inst 	vstm_inst
+#define VFPLABEL_INST 	VSTM_INST
+#ifdef VFP_DECODE
+{"vstm",	3,	ARMVFP2,	25, 27, 0x6,	20, 20, 0,	9, 11, 0x5},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vstm",	0,	ARMVFP2,	0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vstm_inst {
+	unsigned int single;
+	unsigned int add;
+	unsigned int wback;
+	unsigned int d;
+	unsigned int n;
+	unsigned int imm32;
+	unsigned int regs;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx     = index;
+	inst_base->br     = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->single = BIT(inst, 8) == 0;
+	inst_cream->add    = BIT(inst, 23);
+	inst_cream->wback  = BIT(inst, 21);
+	inst_cream->d      = (inst_cream->single ? BITS(inst, 12, 15)<<1|BIT(inst, 22) : BITS(inst, 12, 15)|BIT(inst, 22)<<4);
+	inst_cream->n      = BITS(inst, 16, 19);
+	inst_cream->imm32  = BITS(inst, 0, 7)<<2;
+	inst_cream->regs   = (inst_cream->single ? BITS(inst, 0, 7) : BITS(inst, 1, 7));
+
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST: /* encoding 1 */
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		int i;
+		
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+		
+		addr = (inst_cream->add ? cpu->Reg[inst_cream->n] : cpu->Reg[inst_cream->n] - inst_cream->imm32);
+		DBG("VSTM : addr[%x]\n", addr);
+		
+		
+		for (i = 0; i < inst_cream->regs; i++)
+		{
+			if (inst_cream->single)
+			{
+				fault = check_address_validity(cpu, addr, &phys_addr, 0);
+				if (fault) goto MMU_EXCEPTION;
+
+				fault = interpreter_write_memory(core, addr, phys_addr, cpu->ExtReg[inst_cream->d+i], 32);
+				if (fault) goto MMU_EXCEPTION;
+				DBG("\taddr[%x] <= s%d=[%x]\n", addr, inst_cream->d+i, cpu->ExtReg[inst_cream->d+i]);
+				addr += 4;
+			}
+			else
+			{
+				/* Careful of endianness, little by default */
+				fault = check_address_validity(cpu, addr, &phys_addr, 0);
+				if (fault) goto MMU_EXCEPTION;
+
+				fault = interpreter_write_memory(core, addr, phys_addr, cpu->ExtReg[(inst_cream->d+i)*2], 32);
+				if (fault) goto MMU_EXCEPTION;
+
+				fault = check_address_validity(cpu, addr + 4, &phys_addr, 0);
+				if (fault) goto MMU_EXCEPTION;
+
+				fault = interpreter_write_memory(core, addr + 4, phys_addr, cpu->ExtReg[(inst_cream->d+i)*2 + 1], 32);
+				if (fault) goto MMU_EXCEPTION;
+				DBG("\taddr[%x-%x] <= s[%d-%d]=[%x-%x]\n", addr+4, addr, (inst_cream->d+i)*2+1, (inst_cream->d+i)*2, cpu->ExtReg[(inst_cream->d+i)*2+1], cpu->ExtReg[(inst_cream->d+i)*2]);
+				addr += 8;
+			}
+		}
+		if (inst_cream->wback){
+			cpu->Reg[inst_cream->n] = (inst_cream->add ? cpu->Reg[inst_cream->n] + inst_cream->imm32 : 
+						   cpu->Reg[inst_cream->n] - inst_cream->imm32);
+			DBG("\twback r%d[%x]\n", inst_cream->n, cpu->Reg[inst_cream->n]);
+		}
+
+	}
+	cpu->Reg[15] += 4;
+	INC_PC(sizeof(vstm_inst));
+
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_STC_TRANS
+/* Should be the last operation of STC */
+return VSTM(state, type, instr, value);
+#endif
+#ifdef VFP_STC_IMPL
+int VSTM(ARMul_State * state, int type, ARMword instr, ARMword * value)
+{
+	static int i = 0;
+	static int single_regs, add, wback, d, n, imm32, regs;
+	if (type == ARMul_FIRST)
+	{
+		single_regs = BIT(8) == 0;	/* Single precision */
+		add = BIT(23);		/* */
+		wback = BIT(21);	/* write-back */
+		d = single_regs ? BITS(12, 15)<<1|BIT(22) : BIT(22)<<4|BITS(12, 15); /* Base register */
+		n = BITS(16, 19);	/* destination register */
+		imm32 = BITS(0,7) * 4;	/* may not be used */
+		regs = single_regs ? BITS(0, 7) : BITS(0, 7)>>1; /* FSTMX if regs is odd */
+
+		DBG("VSTM :\n");
+		
+		if (wback) {
+			state->Reg[n] = (add ? state->Reg[n] + imm32 : state->Reg[n] - imm32);
+			DBG("\twback r%d[%x]\n", n, state->Reg[n]);
+		}
+		
+		i = 0;
+		
+		return ARMul_DONE;
+	} 
+	else if (type == ARMul_DATA)
+	{
+		if (single_regs)
+		{
+			*value = state->ExtReg[d + i];
+			DBG("\taddr[?] <= s%d=[%x]\n", d+i, state->ExtReg[d + i]);
+			i++;
+			if (i < regs)
+				return ARMul_INC;
+			else
+				return ARMul_DONE;
+		}
+		else
+		{
+			/* FIXME Careful of endianness, may need to rework this */
+			*value = state->ExtReg[d*2 + i];
+			DBG("\taddr[?] <= s[%d]=[%x]\n", d*2 + i, state->ExtReg[d*2 + i]);
+			i++;
+			if (i < regs*2)
+				return ARMul_INC;
+			else
+				return ARMul_DONE;
+		}
+	}
+
+	return -1;
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	DBG("In %s, pc=0x%x, next_pc=0x%x\n", __FUNCTION__, pc, *next_pc);
+	*tag |= TAG_NEW_BB;
+	if(instr >> 28 != 0xe)
+		*tag |= TAG_CONDITIONAL;
+
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	//arch_arm_undef(cpu, bb, instr);
+	int single = BIT(8) == 0;
+	int add    = BIT(23);
+	int wback  = BIT(21);
+	int d      = single ? BITS(12, 15)<<1|BIT(22) : BITS(12, 15)|(BIT(22)<<4);
+	int n      = BITS(16, 19);
+	int imm32  = BITS(0, 7)<<2;
+	int regs   = single ? BITS(0, 7) : BITS(1, 7);
+
+	Value* Addr = SELECT(CONST1(add), R(n), SUB(R(n), CONST(imm32)));
+	DBG("VSTM \n");
+	//if(single)
+	//	bb = arch_check_mm(cpu, bb, Addr, regs * 4, 0, cpu->dyncom_engine->bb_trap);
+	//else
+	//	bb = arch_check_mm(cpu, bb, Addr, regs * 8, 0, cpu->dyncom_engine->bb_trap);
+
+	int i;	
+	Value* phys_addr;
+	for (i = 0; i < regs; i++)
+	{
+		if (single)
+		{
+			
+			//fault = interpreter_write_memory(core, addr, phys_addr, cpu->ExtReg[inst_cream->d+i], 32);
+			/* if R(i) is R15? */
+			#if 0
+			phys_addr = get_phys_addr(cpu, bb, Addr, 0);
+			bb = cpu->dyncom_engine->bb;
+			arch_write_memory(cpu, bb, phys_addr, RSPR(d + i), 32);
+			#endif
+			//memory_write(cpu, bb, Addr, RSPR(d + i), 32);
+			memory_write(cpu, bb, Addr, IBITCAST32(FR32(d + i)),32);
+			bb = cpu->dyncom_engine->bb;
+			//if (fault) goto MMU_EXCEPTION;
+			//DBG("\taddr[%x] <= s%d=[%x]\n", addr, inst_cream->d+i, cpu->ExtReg[inst_cream->d+i]);
+			Addr = ADD(Addr, CONST(4));
+		}
+		else
+		{
+		
+			//fault = interpreter_write_memory(core, addr, phys_addr, cpu->ExtReg[(inst_cream->d+i)*2], 32);
+			#if 0
+			phys_addr = get_phys_addr(cpu, bb, Addr, 0);
+			bb = cpu->dyncom_engine->bb;
+			arch_write_memory(cpu, bb, phys_addr, RSPR((d + i) * 2), 32);
+			#endif
+			//memory_write(cpu, bb, Addr, RSPR((d + i) * 2), 32);
+			memory_write(cpu, bb, Addr, IBITCAST32(FR32((d + i) * 2)),32);
+			bb = cpu->dyncom_engine->bb;
+			//if (fault) goto MMU_EXCEPTION;
+
+			//fault = interpreter_write_memory(core, addr + 4, phys_addr, cpu->ExtReg[(inst_cream->d+i)*2 + 1], 32);
+			#if 0
+			phys_addr = get_phys_addr(cpu, bb, ADD(Addr, CONST(4)), 0);
+			bb = cpu->dyncom_engine->bb;
+			arch_write_memory(cpu, bb, phys_addr, RSPR((d + i) * 2 + 1), 32);
+			#endif
+			//memory_write(cpu, bb, ADD(Addr, CONST(4)), RSPR((d + i) * 2 + 1), 32);
+			memory_write(cpu, bb, ADD(Addr, CONST(4)), IBITCAST32(FR32((d + i) * 2 + 1)), 32);
+			bb = cpu->dyncom_engine->bb;
+			//if (fault) goto MMU_EXCEPTION;
+			//DBG("\taddr[%x-%x] <= s[%d-%d]=[%x-%x]\n", addr+4, addr, (inst_cream->d+i)*2+1, (inst_cream->d+i)*2, cpu->ExtReg[(inst_cream->d+i)*2+1], cpu->ExtReg[(inst_cream->d+i)*2]);
+			//addr += 8;
+			Addr = ADD(Addr, CONST(8));
+		}
+	}
+	if (wback){
+		//cpu->Reg[n] = (add ? cpu->Reg[n] + imm32 : 
+		//			   cpu->Reg[n] - imm32);
+		LET(n, SELECT(CONST1(add), ADD(R(n), CONST(imm32)), SUB(R(n), CONST(imm32))));
+		DBG("\twback r%d, add=%d, imm32=%d\n", n, add, imm32);
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VPOP */
+/* cond 1100 1D11 1101 Vd-- 101X imm8 imm8 */
+#define vfpinstr 	vpop
+#define vfpinstr_inst 	vpop_inst
+#define VFPLABEL_INST 	VPOP_INST
+#ifdef VFP_DECODE
+{"vpop",        3,      ARMVFP2,        23, 27, 0x19,  16, 21, 0x3d,    9, 11, 0x5},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vpop",    0,    ARMVFP2,    0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vpop_inst {
+	unsigned int single;
+	unsigned int d;
+	unsigned int imm32;
+	unsigned int regs;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->single  = BIT(inst, 8) == 0;
+	inst_cream->d       = (inst_cream->single ? (BITS(inst, 12, 15)<<1)|BIT(inst, 22) : BITS(inst, 12, 15)|(BIT(inst, 22)<<4));
+	inst_cream->imm32   = BITS(inst, 0, 7)<<2;
+	inst_cream->regs    = (inst_cream->single ? BITS(inst, 0, 7) : BITS(inst, 1, 7));
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		int i;
+		unsigned int value1, value2;
+
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+		
+		DBG("VPOP :\n");
+		
+		addr = cpu->Reg[R13];
+		
+
+		for (i = 0; i < inst_cream->regs; i++)
+		{
+			if (inst_cream->single)
+			{
+				fault = check_address_validity(cpu, addr, &phys_addr, 1);
+				if (fault) goto MMU_EXCEPTION;
+
+				fault = interpreter_read_memory(core, addr, phys_addr, value1, 32);
+				if (fault) goto MMU_EXCEPTION;
+				DBG("\ts%d <= [%x] addr[%x]\n", inst_cream->d+i, value1, addr);
+				cpu->ExtReg[inst_cream->d+i] = value1;
+				addr += 4;
+			}
+			else
+			{
+				/* Careful of endianness, little by default */
+				fault = check_address_validity(cpu, addr, &phys_addr, 1);
+				if (fault) goto MMU_EXCEPTION;
+
+				fault = interpreter_read_memory(core, addr, phys_addr, value1, 32);
+				if (fault) goto MMU_EXCEPTION;
+
+				fault = check_address_validity(cpu, addr + 4, &phys_addr, 1);
+				if (fault) goto MMU_EXCEPTION;
+
+				fault = interpreter_read_memory(core, addr + 4, phys_addr, value2, 32);
+				if (fault) goto MMU_EXCEPTION;
+				DBG("\ts[%d-%d] <= [%x-%x] addr[%x-%x]\n", (inst_cream->d+i)*2+1, (inst_cream->d+i)*2, value2, value1, addr+4, addr);
+				cpu->ExtReg[(inst_cream->d+i)*2] = value1;
+				cpu->ExtReg[(inst_cream->d+i)*2 + 1] = value2;
+				addr += 8;
+			}
+		}
+		DBG("\tsp[%x]", cpu->Reg[R13]);
+		cpu->Reg[R13] = cpu->Reg[R13] + inst_cream->imm32;
+		DBG("=>[%x]\n", cpu->Reg[R13]);
+		
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vpop_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_LDC_TRANS
+if (P == 0 && U == 1 && W == 1 && Rn == 0xD)
+{
+	return VPOP(state, type, instr, value);
+}
+#endif
+#ifdef VFP_LDC_IMPL
+int VPOP(ARMul_State * state, int type, ARMword instr, ARMword value)
+{
+	static int i = 0;
+	static int single_regs, add, wback, d, n, imm32, regs;
+	if (type == ARMul_FIRST)
+	{
+		single_regs = BIT(8) == 0;	/* Single precision */
+		d = single_regs ? BITS(12, 15)<<1|BIT(22) : BIT(22)<<4|BITS(12, 15); /* Base register */
+		imm32 = BITS(0,7)<<2;	/* may not be used */
+		regs = single_regs ? BITS(0, 7) : BITS(1, 7); /* FLDMX if regs is odd */
+
+		DBG("VPOP :\n");
+		DBG("\tsp[%x]", state->Reg[R13]);
+		state->Reg[R13] = state->Reg[R13] + imm32;
+		DBG("=>[%x]\n", state->Reg[R13]);
+		
+		i = 0;
+		
+		return ARMul_DONE;
+	}
+	else if (type == ARMul_TRANSFER)
+	{
+		return ARMul_DONE;
+	}
+	else if (type == ARMul_DATA)
+	{
+		if (single_regs)
+		{
+			state->ExtReg[d + i] = value;
+			DBG("\ts%d <= [%x]\n", d + i, value);
+			i++;
+			if (i < regs)
+				return ARMul_INC;
+			else
+				return ARMul_DONE;
+		}
+		else
+		{
+			/* FIXME Careful of endianness, may need to rework this */
+			state->ExtReg[d*2 + i] = value;
+			DBG("\ts%d <= [%x]\n", d*2 + i, value);
+			i++;
+			if (i < regs*2)
+				return ARMul_INC;
+			else
+				return ARMul_DONE;
+		}
+	}
+
+	return -1;
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	/* Should check if PC is destination register */
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	DBG("In %s, pc=0x%x, next_pc=0x%x\n", __FUNCTION__, pc, *next_pc);
+	*tag |= TAG_NEW_BB;
+	if(instr >> 28 != 0xe)
+		*tag |= TAG_CONDITIONAL;
+
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instruction .\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int single  = BIT(8) == 0;
+	int d       = (single ? BITS(12, 15)<<1|BIT(22) : BITS(12, 15)|(BIT(22)<<4));
+	int imm32   = BITS(0, 7)<<2;
+	int regs    = (single ? BITS(0, 7) : BITS(1, 7));
+
+	int i;
+	unsigned int value1, value2;
+
+	DBG("VPOP :\n");
+		
+	Value* Addr = R(13);
+	Value* val;
+	//if(single)
+	//	bb = arch_check_mm(cpu, bb, Addr, regs * 4, 1, cpu->dyncom_engine->bb_trap);
+	//else
+	//	bb = arch_check_mm(cpu, bb, Addr, regs * 4, 1, cpu->dyncom_engine->bb_trap);
+	//Value* phys_addr;	
+	for (i = 0; i < regs; i++)
+	{
+		if (single)
+		{
+			#if 0
+			phys_addr = get_phys_addr(cpu, bb, Addr, 1);
+			bb = cpu->dyncom_engine->bb;
+			val = arch_read_memory(cpu,bb,phys_addr,0,32);
+			#endif
+			memory_read(cpu, bb, Addr, 0, 32);
+			bb = cpu->dyncom_engine->bb;
+			val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
+			LETFPS(d + i, FPBITCAST32(val));
+			Addr = ADD(Addr, CONST(4));
+		}
+		else
+		{
+			/* Careful of endianness, little by default */
+			#if 0
+			phys_addr = get_phys_addr(cpu, bb, Addr, 1);
+			bb = cpu->dyncom_engine->bb;
+			val = arch_read_memory(cpu,bb,phys_addr,0,32);
+			#endif
+			memory_read(cpu, bb, Addr, 0, 32);
+			bb = cpu->dyncom_engine->bb;
+			val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
+			LETFPS((d + i) * 2, FPBITCAST32(val));
+			#if 0
+			phys_addr = get_phys_addr(cpu, bb, ADD(Addr, CONST(4)), 1);
+			bb = cpu->dyncom_engine->bb;
+			val = arch_read_memory(cpu,bb,phys_addr,0,32);
+			#endif
+			memory_read(cpu, bb, ADD(Addr, CONST(4)), 0, 32);
+			bb = cpu->dyncom_engine->bb;
+			val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
+			LETFPS((d + i) * 2 + 1, FPBITCAST32(val));
+
+			Addr = ADD(Addr, CONST(8));
+		}
+	}
+	LET(13, ADD(R(13), CONST(imm32)));
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VLDR */
+/* cond 1101 UD01 Rn-- Vd-- 101X imm8 imm8 */
+#define vfpinstr 	vldr
+#define vfpinstr_inst 	vldr_inst
+#define VFPLABEL_INST 	VLDR_INST
+#ifdef VFP_DECODE
+{"vldr",        3,      ARMVFP2,        24, 27, 0xd,   20, 21, 0x1,     9, 11, 0x5},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vldr",    0,    ARMVFP2,    0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vldr_inst {
+	unsigned int single;
+	unsigned int n;
+	unsigned int d;
+	unsigned int imm32;
+	unsigned int add;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+	
+	inst_cream->single = BIT(inst, 8) == 0;
+	inst_cream->add	   = BIT(inst, 23);
+	inst_cream->imm32  = BITS(inst, 0,7) << 2;
+	inst_cream->d      = (inst_cream->single ? BITS(inst, 12, 15)<<1|BIT(inst, 22) : BITS(inst, 12, 15)|BIT(inst, 22)<<4);
+	inst_cream->n	   = BITS(inst, 16, 19);
+
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+		
+		unsigned int base = (inst_cream->n == 15 ? (cpu->Reg[inst_cream->n] & 0xFFFFFFFC) + 8 : cpu->Reg[inst_cream->n]);
+		addr = (inst_cream->add ? base + inst_cream->imm32 : base - inst_cream->imm32);
+		DBG("VLDR :\n", addr);
+		
+		
+		if (inst_cream->single)
+		{
+			fault = check_address_validity(cpu, addr, &phys_addr, 1);
+			if (fault) goto MMU_EXCEPTION;
+			fault = interpreter_read_memory(core, addr, phys_addr, cpu->ExtReg[inst_cream->d], 32);
+			if (fault) goto MMU_EXCEPTION;
+			DBG("\ts%d <= [%x] addr[%x]\n", inst_cream->d, cpu->ExtReg[inst_cream->d], addr);
+		}
+		else
+		{
+			unsigned int word1, word2;
+			fault = check_address_validity(cpu, addr, &phys_addr, 1);
+			if (fault) goto MMU_EXCEPTION;
+			fault = interpreter_read_memory(core, addr, phys_addr, word1, 32);
+			if (fault) goto MMU_EXCEPTION;
+
+			fault = check_address_validity(cpu, addr + 4, &phys_addr, 1);
+			if (fault) goto MMU_EXCEPTION;
+			fault = interpreter_read_memory(core, addr + 4, phys_addr, word2, 32);
+			if (fault) goto MMU_EXCEPTION;
+			/* Check endianness */
+			cpu->ExtReg[inst_cream->d*2] = word1;
+			cpu->ExtReg[inst_cream->d*2+1] = word2;
+			DBG("\ts[%d-%d] <= [%x-%x] addr[%x-%x]\n", inst_cream->d*2+1, inst_cream->d*2, word2, word1, addr+4, addr);
+		}
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vldr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_LDC_TRANS
+if (P == 1 && W == 0)
+{
+	return VLDR(state, type, instr, value);
+}
+#endif
+#ifdef VFP_LDC_IMPL
+int VLDR(ARMul_State * state, int type, ARMword instr, ARMword value)
+{
+	static int i = 0;
+	static int single_reg, add, d, n, imm32, regs;
+	if (type == ARMul_FIRST)
+	{
+		single_reg = BIT(8) == 0;	/* Double precision */
+		add = BIT(23);		/* */
+		imm32 = BITS(0,7)<<2;	/* may not be used */
+		d = single_reg ? BITS(12, 15)<<1|BIT(22) : BIT(22)<<4|BITS(12, 15); /* Base register */
+		n = BITS(16, 19);	/* destination register */
+		
+		DBG("VLDR :\n");
+		
+		i = 0;
+		regs = 1;
+		
+		return ARMul_DONE;
+	}
+	else if (type == ARMul_TRANSFER)
+	{
+		return ARMul_DONE;
+	}
+	else if (type == ARMul_DATA)
+	{
+		if (single_reg)
+		{
+			state->ExtReg[d+i] = value;
+			DBG("\ts%d <= [%x]\n", d+i, value);
+			i++;
+			if (i < regs)
+				return ARMul_INC;
+			else
+				return ARMul_DONE;
+		}
+		else
+		{
+			/* FIXME Careful of endianness, may need to rework this */
+			state->ExtReg[d*2+i] = value;
+			DBG("\ts[%d] <= [%x]\n", d*2+i, value);
+			i++;
+			if (i < regs*2)
+				return ARMul_INC;
+			else
+				return ARMul_DONE;
+		}
+	}
+
+	return -1;
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	/* Should check if PC is destination register */
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	DBG("In %s, pc=0x%x, next_pc=0x%x\n", __FUNCTION__, pc, *next_pc);
+	*tag |= TAG_NEW_BB;
+	if(instr >> 28 != 0xe)
+		*tag |= TAG_CONDITIONAL;
+
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	int single = BIT(8) == 0;
+	int add    = BIT(23);
+	int wback  = BIT(21);
+	int d      = (single ? BITS(12, 15)<<1|BIT(22) : BITS(12, 15)|(BIT(22)<<4));
+	int n      = BITS(16, 19);
+	int imm32  = BITS(0, 7)<<2;
+	int regs   = (single ? BITS(0, 7) : BITS(1, 7));
+	Value* base = R(n);
+	DBG("\t\tin %s .\n", __FUNCTION__);
+	if(n == 15){
+		base = ADD(AND(base, CONST(0xFFFFFFFC)), CONST(8));
+	}
+	Value* Addr = add ? (ADD(base, CONST(imm32))) : (SUB(base, CONST(imm32)));
+	//if(single)
+	//	bb = arch_check_mm(cpu, bb, Addr, 4, 1, cpu->dyncom_engine->bb_trap);
+	//else
+	//	bb = arch_check_mm(cpu, bb, Addr, 8, 1, cpu->dyncom_engine->bb_trap);
+	//Value* phys_addr;
+	Value* val;
+	if(single){
+		#if 0
+		phys_addr = get_phys_addr(cpu, bb, Addr, 1);
+		bb = cpu->dyncom_engine->bb;
+		val = arch_read_memory(cpu,bb,phys_addr,0,32);
+		#endif
+		memory_read(cpu, bb, Addr, 0, 32);
+		bb = cpu->dyncom_engine->bb;
+		val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
+		//LETS(d, val);
+		LETFPS(d,FPBITCAST32(val));
+	}
+	else{
+		#if 0
+		phys_addr = get_phys_addr(cpu, bb, Addr, 1);
+		bb = cpu->dyncom_engine->bb;
+		val = arch_read_memory(cpu,bb,phys_addr,0,32);
+		#endif
+		memory_read(cpu, bb, Addr, 0, 32);
+		bb = cpu->dyncom_engine->bb;
+		val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
+		//LETS(d * 2, val);
+		LETFPS(d * 2,FPBITCAST32(val));
+		#if 0
+		phys_addr = get_phys_addr(cpu, bb, ADD(Addr, CONST(4)), 1);
+		bb = cpu->dyncom_engine->bb;
+		val = arch_read_memory(cpu,bb,phys_addr,0,32);
+		#endif
+		memory_read(cpu, bb, ADD(Addr, CONST(4)), 0,32);
+		bb = cpu->dyncom_engine->bb;
+		val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
+		//LETS(d * 2 + 1, val);
+		LETFPS( d * 2 + 1,FPBITCAST32(val));
+	}
+
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VLDM */
+/* cond 110P UDW1 Rn-- Vd-- 101X imm8 imm8 */
+#define vfpinstr 	vldm
+#define vfpinstr_inst 	vldm_inst
+#define VFPLABEL_INST 	VLDM_INST
+#ifdef VFP_DECODE
+{"vldm",	3,	ARMVFP2,	25, 27, 0x6,	20, 20, 1,	9, 11, 0x5},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vldm",	0,	ARMVFP2,	0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vldm_inst {
+	unsigned int single;
+	unsigned int add;
+	unsigned int wback;
+	unsigned int d;
+	unsigned int n;
+	unsigned int imm32;
+	unsigned int regs;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx     = index;
+	inst_base->br     = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->single = BIT(inst, 8) == 0;
+	inst_cream->add    = BIT(inst, 23);
+	inst_cream->wback  = BIT(inst, 21);
+	inst_cream->d      = (inst_cream->single ? BITS(inst, 12, 15)<<1|BIT(inst, 22) : BITS(inst, 12, 15)|BIT(inst, 22)<<4);
+	inst_cream->n      = BITS(inst, 16, 19);
+	inst_cream->imm32  = BITS(inst, 0, 7)<<2;
+	inst_cream->regs   = (inst_cream->single ? BITS(inst, 0, 7) : BITS(inst, 1, 7));
+
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		int i;
+		
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+		
+		addr = (inst_cream->add ? cpu->Reg[inst_cream->n] : cpu->Reg[inst_cream->n] - inst_cream->imm32);
+		DBG("VLDM : addr[%x]\n", addr);
+		
+		for (i = 0; i < inst_cream->regs; i++)
+		{
+			if (inst_cream->single)
+			{
+				fault = check_address_validity(cpu, addr, &phys_addr, 1);
+				if (fault) goto MMU_EXCEPTION;
+				fault = interpreter_read_memory(core, addr, phys_addr, cpu->ExtReg[inst_cream->d+i], 32);
+				if (fault) goto MMU_EXCEPTION;
+				DBG("\ts%d <= [%x] addr[%x]\n", inst_cream->d+i, cpu->ExtReg[inst_cream->d+i], addr);
+				addr += 4;
+			}
+			else
+			{
+				/* Careful of endianness, little by default */
+				fault = check_address_validity(cpu, addr, &phys_addr, 1);
+				if (fault) goto MMU_EXCEPTION;
+				fault = interpreter_read_memory(core, addr, phys_addr, cpu->ExtReg[(inst_cream->d+i)*2], 32);
+				if (fault) goto MMU_EXCEPTION;
+
+				fault = check_address_validity(cpu, addr + 4, &phys_addr, 1);
+				if (fault) goto MMU_EXCEPTION;
+				fault = interpreter_read_memory(core, addr + 4, phys_addr, cpu->ExtReg[(inst_cream->d+i)*2 + 1], 32);
+				if (fault) goto MMU_EXCEPTION;
+				DBG("\ts[%d-%d] <= [%x-%x] addr[%x-%x]\n", (inst_cream->d+i)*2+1, (inst_cream->d+i)*2, cpu->ExtReg[(inst_cream->d+i)*2+1], cpu->ExtReg[(inst_cream->d+i)*2], addr+4, addr);
+				addr += 8;
+			}
+		}
+		if (inst_cream->wback){
+			cpu->Reg[inst_cream->n] = (inst_cream->add ? cpu->Reg[inst_cream->n] + inst_cream->imm32 : 
+						   cpu->Reg[inst_cream->n] - inst_cream->imm32);
+			DBG("\twback r%d[%x]\n", inst_cream->n, cpu->Reg[inst_cream->n]);
+		}
+
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_LDC_TRANS
+/* Should be the last operation of LDC */
+return VLDM(state, type, instr, value);
+#endif
+#ifdef VFP_LDC_IMPL
+int VLDM(ARMul_State * state, int type, ARMword instr, ARMword value)
+{
+	static int i = 0;
+	static int single_regs, add, wback, d, n, imm32, regs;
+	if (type == ARMul_FIRST)
+	{
+		single_regs = BIT(8) == 0;	/* Single precision */
+		add = BIT(23);		/* */
+		wback = BIT(21);	/* write-back */
+		d = single_regs ? BITS(12, 15)<<1|BIT(22) : BIT(22)<<4|BITS(12, 15); /* Base register */
+		n = BITS(16, 19);	/* destination register */
+		imm32 = BITS(0,7) * 4;	/* may not be used */
+		regs = single_regs ? BITS(0, 7) : BITS(0, 7)>>1; /* FLDMX if regs is odd */
+
+		DBG("VLDM :\n");
+		
+		if (wback) {
+			state->Reg[n] = (add ? state->Reg[n] + imm32 : state->Reg[n] - imm32);
+			DBG("\twback r%d[%x]\n", n, state->Reg[n]);
+		}
+		
+		i = 0;
+		
+		return ARMul_DONE;
+	} 
+	else if (type == ARMul_DATA)
+	{
+		if (single_regs)
+		{
+			state->ExtReg[d + i] = value;
+			DBG("\ts%d <= [%x] addr[?]\n", d+i, state->ExtReg[d + i]);
+			i++;
+			if (i < regs)
+				return ARMul_INC;
+			else
+				return ARMul_DONE;
+		}
+		else
+		{
+			/* FIXME Careful of endianness, may need to rework this */
+			state->ExtReg[d*2 + i] = value;
+			DBG("\ts[%d] <= [%x] addr[?]\n", d*2 + i, state->ExtReg[d*2 + i]);
+			i++;
+			if (i < regs*2)
+				return ARMul_INC;
+			else
+				return ARMul_DONE;
+		}
+	}
+
+	return -1;
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	DBG("In %s, pc=0x%x, next_pc=0x%x\n", __FUNCTION__, pc, *next_pc);
+	*tag |= TAG_NEW_BB;
+	if(instr >> 28 != 0xe)
+		*tag |= TAG_CONDITIONAL;
+
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	int single = BIT(8) == 0;
+	int add    = BIT(23);
+	int wback  = BIT(21);
+	int d      = single ? BITS(12, 15)<<1|BIT(22) : BITS(12, 15)|BIT(22)<<4;
+	int n      = BITS(16, 19);
+	int imm32  = BITS(0, 7)<<2;
+	int regs   = single ? BITS(0, 7) : BITS(1, 7);
+
+	Value* Addr = SELECT(CONST1(add), R(n), SUB(R(n), CONST(imm32)));
+	//if(single)
+	//	bb = arch_check_mm(cpu, bb, Addr, regs * 4, 1, cpu->dyncom_engine->bb_trap);
+	//else
+	//	bb = arch_check_mm(cpu, bb, Addr, regs * 4, 1, cpu->dyncom_engine->bb_trap);
+
+	DBG("VLDM \n");
+	int i;	
+	//Value* phys_addr;
+	Value* val;
+	for (i = 0; i < regs; i++)
+	{
+		if (single)
+		{
+			
+			//fault = interpreter_write_memory(core, addr, phys_addr, cpu->ExtReg[inst_cream->d+i], 32);
+			/* if R(i) is R15? */
+			#if 0
+			phys_addr = get_phys_addr(cpu, bb, Addr, 1);
+			bb = cpu->dyncom_engine->bb;
+			val = arch_read_memory(cpu,bb,phys_addr,0,32);
+			#endif
+			memory_read(cpu, bb, Addr, 0, 32);
+			bb = cpu->dyncom_engine->bb;
+			val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
+			//LETS(d + i, val);
+			LETFPS(d + i, FPBITCAST32(val));
+			//if (fault) goto MMU_EXCEPTION;
+			//DBG("\taddr[%x] <= s%d=[%x]\n", addr, inst_cream->d+i, cpu->ExtReg[inst_cream->d+i]);
+			Addr = ADD(Addr, CONST(4));
+		}
+		else
+		{
+			#if 0	
+			phys_addr = get_phys_addr(cpu, bb, Addr, 1);
+			bb = cpu->dyncom_engine->bb;
+			val = arch_read_memory(cpu,bb,phys_addr,0,32);
+			#endif
+			memory_read(cpu, bb, Addr, 0, 32);
+			bb = cpu->dyncom_engine->bb;
+			val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
+			LETFPS((d + i) * 2, FPBITCAST32(val));
+			#if 0
+			phys_addr = get_phys_addr(cpu, bb, ADD(Addr, CONST(4)), 1);
+			bb = cpu->dyncom_engine->bb;
+			val = arch_read_memory(cpu,bb,phys_addr,0,32);
+			#endif
+			memory_read(cpu, bb, Addr, 0, 32);
+			bb = cpu->dyncom_engine->bb;
+			val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
+			LETFPS((d + i) * 2 + 1, FPBITCAST32(val));
+
+			//fault = interpreter_write_memory(core, addr + 4, phys_addr, cpu->ExtReg[(inst_cream->d+i)*2 + 1], 32);
+			//DBG("\taddr[%x-%x] <= s[%d-%d]=[%x-%x]\n", addr+4, addr, (inst_cream->d+i)*2+1, (inst_cream->d+i)*2, cpu->ExtReg[(inst_cream->d+i)*2+1], cpu->ExtReg[(inst_cream->d+i)*2]);
+			//addr += 8;
+			Addr = ADD(Addr, CONST(8));
+		}
+	}
+	if (wback){
+		//cpu->Reg[n] = (add ? cpu->Reg[n] + imm32 : 
+		//			   cpu->Reg[n] - imm32);
+		LET(n, SELECT(CONST1(add), ADD(R(n), CONST(imm32)), SUB(R(n), CONST(imm32))));
+		DBG("\twback r%d, add=%d, imm32=%d\n", n, add, imm32);
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+#define VFP_DEBUG_TRANSLATE DBG("in func %s, %x\n", __FUNCTION__, inst);
+#define VFP_DEBUG_UNIMPLEMENTED(x) printf("in func %s, " #x " unimplemented\n", __FUNCTION__); exit(-1);
+#define VFP_DEBUG_UNTESTED(x) printf("in func %s, " #x " untested\n", __FUNCTION__);
+
+#define CHECK_VFP_ENABLED	
+	
+#define CHECK_VFP_CDP_RET	vfp_raise_exceptions(cpu, ret, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]); //if (ret == -1) {printf("VFP CDP FAILURE %x\n", inst_cream->instr); exit(-1);}
diff --git a/src/core/arm/interpreter/vfp/vfpsingle.cpp b/src/core/arm/interpreter/vfp/vfpsingle.cpp
new file mode 100644
index 000000000..05279f5ce
--- /dev/null
+++ b/src/core/arm/interpreter/vfp/vfpsingle.cpp
@@ -0,0 +1,1278 @@
+/*
+    vfp/vfpsingle.c - ARM VFPv3 emulation unit - SoftFloat single instruction
+    Copyright (C) 2003 Skyeye Develop Group
+    for help please send mail to <skyeye-developer@lists.gro.clinux.org>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/*
+ * This code is derived in part from :
+ * - Android kernel
+ * - John R. Housers softfloat library, which
+ * carries the following notice:
+ *
+ * ===========================================================================
+ * This C source file is part of the SoftFloat IEC/IEEE Floating-point
+ * Arithmetic Package, Release 2.
+ *
+ * Written by John R. Hauser.  This work was made possible in part by the
+ * International Computer Science Institute, located at Suite 600, 1947 Center
+ * Street, Berkeley, California 94704.  Funding was partially provided by the
+ * National Science Foundation under grant MIP-9311980.  The original version
+ * of this code was written as part of a project to build a fixed-point vector
+ * processor in collaboration with the University of California at Berkeley,
+ * overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+ * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
+ * arithmetic/softfloat.html'.
+ *
+ * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
+ * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
+ * TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
+ * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
+ * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
+ *
+ * Derivative works are acceptable, even for commercial purposes, so long as
+ * (1) they include prominent notice that the work is derivative, and (2) they
+ * include prominent notice akin to these three paragraphs for those parts of
+ * this code that are retained.
+ * ===========================================================================
+ */
+
+#include "core/arm/interpreter/vfp/vfp_helper.h"
+#include "core/arm/interpreter/vfp/asm_vfp.h"
+#include "core/arm/interpreter/vfp/vfp.h"
+
+static struct vfp_single vfp_single_default_qnan = {
+	//.exponent	= 255,
+	//.sign		= 0,
+	//.significand	= VFP_SINGLE_SIGNIFICAND_QNAN,
+};
+
+static void vfp_single_dump(const char *str, struct vfp_single *s)
+{
+	pr_debug("VFP: %s: sign=%d exponent=%d significand=%08x\n",
+		 str, s->sign != 0, s->exponent, s->significand);
+}
+
+static void vfp_single_normalise_denormal(struct vfp_single *vs)
+{
+	int bits = 31 - fls(vs->significand);
+
+	vfp_single_dump("normalise_denormal: in", vs);
+
+	if (bits) {
+		vs->exponent -= bits - 1;
+		vs->significand <<= bits;
+	}
+
+	vfp_single_dump("normalise_denormal: out", vs);
+}
+
+
+u32 vfp_single_normaliseround(ARMul_State* state, int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions, const char *func)
+{
+	u32 significand, incr, rmode;
+	int exponent, shift, underflow;
+
+	vfp_single_dump("pack: in", vs);
+
+	/*
+	 * Infinities and NaNs are a special case.
+	 */
+	if (vs->exponent == 255 && (vs->significand == 0 || exceptions))
+		goto pack;
+
+	/*
+	 * Special-case zero.
+	 */
+	if (vs->significand == 0) {
+		vs->exponent = 0;
+		goto pack;
+	}
+
+	exponent = vs->exponent;
+	significand = vs->significand;
+
+	/*
+	 * Normalise first.  Note that we shift the significand up to
+	 * bit 31, so we have VFP_SINGLE_LOW_BITS + 1 below the least
+	 * significant bit.
+	 */
+	shift = 32 - fls(significand);
+	if (shift < 32 && shift) {
+		exponent -= shift;
+		significand <<= shift;
+	}
+
+#if 1
+	vs->exponent = exponent;
+	vs->significand = significand;
+	vfp_single_dump("pack: normalised", vs);
+#endif
+
+	/*
+	 * Tiny number?
+	 */
+	underflow = exponent < 0;
+	if (underflow) {
+		significand = vfp_shiftright32jamming(significand, -exponent);
+		exponent = 0;
+#if 1
+		vs->exponent = exponent;
+		vs->significand = significand;
+		vfp_single_dump("pack: tiny number", vs);
+#endif
+		if (!(significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1)))
+			underflow = 0;
+	}
+
+	/*
+	 * Select rounding increment.
+	 */
+	incr = 0;
+	rmode = fpscr & FPSCR_RMODE_MASK;
+
+	if (rmode == FPSCR_ROUND_NEAREST) {
+		incr = 1 << VFP_SINGLE_LOW_BITS;
+		if ((significand & (1 << (VFP_SINGLE_LOW_BITS + 1))) == 0)
+			incr -= 1;
+	} else if (rmode == FPSCR_ROUND_TOZERO) {
+		incr = 0;
+	} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vs->sign != 0))
+		incr = (1 << (VFP_SINGLE_LOW_BITS + 1)) - 1;
+
+	pr_debug("VFP: rounding increment = 0x%08x\n", incr);
+
+	/*
+	 * Is our rounding going to overflow?
+	 */
+	if ((significand + incr) < significand) {
+		exponent += 1;
+		significand = (significand >> 1) | (significand & 1);
+		incr >>= 1;
+#if 1
+		vs->exponent = exponent;
+		vs->significand = significand;
+		vfp_single_dump("pack: overflow", vs);
+#endif
+	}
+
+	/*
+	 * If any of the low bits (which will be shifted out of the
+	 * number) are non-zero, the result is inexact.
+	 */
+	if (significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1))
+		exceptions |= FPSCR_IXC;
+
+	/*
+	 * Do our rounding.
+	 */
+	significand += incr;
+
+	/*
+	 * Infinity?
+	 */
+	if (exponent >= 254) {
+		exceptions |= FPSCR_OFC | FPSCR_IXC;
+		if (incr == 0) {
+			vs->exponent = 253;
+			vs->significand = 0x7fffffff;
+		} else {
+			vs->exponent = 255;		/* infinity */
+			vs->significand = 0;
+		}
+	} else {
+		if (significand >> (VFP_SINGLE_LOW_BITS + 1) == 0)
+			exponent = 0;
+		if (exponent || significand > 0x80000000)
+			underflow = 0;
+		if (underflow)
+			exceptions |= FPSCR_UFC;
+		vs->exponent = exponent;
+		vs->significand = significand >> 1;
+	}
+
+ pack:
+	vfp_single_dump("pack: final", vs);
+	{
+		s32 d = vfp_single_pack(vs);
+#if 1
+		pr_debug("VFP: %s: d(s%d)=%08x exceptions=%08x\n", func,
+			 sd, d, exceptions);
+#endif
+		vfp_put_float(state, d, sd);
+	}
+
+	return exceptions;
+}
+
+/*
+ * Propagate the NaN, setting exceptions if it is signalling.
+ * 'n' is always a NaN.  'm' may be a number, NaN or infinity.
+ */
+static u32
+vfp_propagate_nan(struct vfp_single *vsd, struct vfp_single *vsn,
+		  struct vfp_single *vsm, u32 fpscr)
+{
+	struct vfp_single *nan;
+	int tn, tm = 0;
+
+	tn = vfp_single_type(vsn);
+
+	if (vsm)
+		tm = vfp_single_type(vsm);
+
+	if (fpscr & FPSCR_DEFAULT_NAN)
+		/*
+		 * Default NaN mode - always returns a quiet NaN
+		 */
+		nan = &vfp_single_default_qnan;
+	else {
+		/*
+		 * Contemporary mode - select the first signalling
+		 * NAN, or if neither are signalling, the first
+		 * quiet NAN.
+		 */
+		if (tn == VFP_SNAN || (tm != VFP_SNAN && tn == VFP_QNAN))
+			nan = vsn;
+		else
+			nan = vsm;
+		/*
+		 * Make the NaN quiet.
+		 */
+		nan->significand |= VFP_SINGLE_SIGNIFICAND_QNAN;
+	}
+
+	*vsd = *nan;
+
+	/*
+	 * If one was a signalling NAN, raise invalid operation.
+	 */
+	return tn == VFP_SNAN || tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG;
+}
+
+
+/*
+ * Extended operations
+ */
+static u32 vfp_single_fabs(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+	vfp_put_float(state, vfp_single_packed_abs(m), sd);
+	return 0;
+}
+
+static u32 vfp_single_fcpy(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+	vfp_put_float(state, m, sd);
+	return 0;
+}
+
+static u32 vfp_single_fneg(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+	vfp_put_float(state, vfp_single_packed_negate(m), sd);
+	return 0;
+}
+
+static const u16 sqrt_oddadjust[] = {
+	0x0004, 0x0022, 0x005d, 0x00b1, 0x011d, 0x019f, 0x0236, 0x02e0,
+	0x039c, 0x0468, 0x0545, 0x0631, 0x072b, 0x0832, 0x0946, 0x0a67
+};
+
+static const u16 sqrt_evenadjust[] = {
+	0x0a2d, 0x08af, 0x075a, 0x0629, 0x051a, 0x0429, 0x0356, 0x029e,
+	0x0200, 0x0179, 0x0109, 0x00af, 0x0068, 0x0034, 0x0012, 0x0002
+};
+
+u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand)
+{
+	int index;
+	u32 z, a;
+
+	if ((significand & 0xc0000000) != 0x40000000) {
+		pr_debug("VFP: estimate_sqrt: invalid significand\n");
+	}
+
+	a = significand << 1;
+	index = (a >> 27) & 15;
+	if (exponent & 1) {
+		z = 0x4000 + (a >> 17) - sqrt_oddadjust[index];
+		z = ((a / z) << 14) + (z << 15);
+		a >>= 1;
+	} else {
+		z = 0x8000 + (a >> 17) - sqrt_evenadjust[index];
+		z = a / z + z;
+		z = (z >= 0x20000) ? 0xffff8000 : (z << 15);
+		if (z <= a)
+			return (s32)a >> 1;
+	}
+	{
+		u64 v = (u64)a << 31;
+		do_div(v, z);
+		return v + (z >> 1);
+	}
+}
+
+static u32 vfp_single_fsqrt(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+	struct vfp_single vsm, vsd, *vsp;
+	int ret, tm;
+
+	vfp_single_unpack(&vsm, m);
+	tm = vfp_single_type(&vsm);
+	if (tm & (VFP_NAN|VFP_INFINITY)) {
+		vsp = &vsd;
+
+		if (tm & VFP_NAN)
+			ret = vfp_propagate_nan(vsp, &vsm, NULL, fpscr);
+		else if (vsm.sign == 0) {
+ sqrt_copy:
+			vsp = &vsm;
+			ret = 0;
+		} else {
+ sqrt_invalid:
+			vsp = &vfp_single_default_qnan;
+			ret = FPSCR_IOC;
+		}
+		vfp_put_float(state, vfp_single_pack(vsp), sd);
+		return ret;
+	}
+
+	/*
+	 * sqrt(+/- 0) == +/- 0
+	 */
+	if (tm & VFP_ZERO)
+		goto sqrt_copy;
+
+	/*
+	 * Normalise a denormalised number
+	 */
+	if (tm & VFP_DENORMAL)
+		vfp_single_normalise_denormal(&vsm);
+
+	/*
+	 * sqrt(<0) = invalid
+	 */
+	if (vsm.sign)
+		goto sqrt_invalid;
+
+	vfp_single_dump("sqrt", &vsm);
+
+	/*
+	 * Estimate the square root.
+	 */
+	vsd.sign = 0;
+	vsd.exponent = ((vsm.exponent - 127) >> 1) + 127;
+	vsd.significand = vfp_estimate_sqrt_significand(vsm.exponent, vsm.significand) + 2;
+
+	vfp_single_dump("sqrt estimate", &vsd);
+
+	/*
+	 * And now adjust.
+	 */
+	if ((vsd.significand & VFP_SINGLE_LOW_BITS_MASK) <= 5) {
+		if (vsd.significand < 2) {
+			vsd.significand = 0xffffffff;
+		} else {
+			u64 term;
+			s64 rem;
+			vsm.significand <<= !(vsm.exponent & 1);
+			term = (u64)vsd.significand * vsd.significand;
+			rem = ((u64)vsm.significand << 32) - term;
+
+			pr_debug("VFP: term=%016llx rem=%016llx\n", term, rem);
+
+			while (rem < 0) {
+				vsd.significand -= 1;
+				rem += ((u64)vsd.significand << 1) | 1;
+			}
+			vsd.significand |= rem != 0;
+		}
+	}
+	vsd.significand = vfp_shiftright32jamming(vsd.significand, 1);
+
+	return vfp_single_normaliseround(state, sd, &vsd, fpscr, 0, "fsqrt");
+}
+
+/*
+ * Equal	:= ZC
+ * Less than	:= N
+ * Greater than	:= C
+ * Unordered	:= CV
+ */
+static u32 vfp_compare(ARMul_State* state, int sd, int signal_on_qnan, s32 m, u32 fpscr)
+{
+	s32 d;
+	u32 ret = 0;
+
+	d = vfp_get_float(state, sd);
+	if (vfp_single_packed_exponent(m) == 255 && vfp_single_packed_mantissa(m)) {
+		ret |= FPSCR_C | FPSCR_V;
+		if (signal_on_qnan || !(vfp_single_packed_mantissa(m) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1))))
+			/*
+			 * Signalling NaN, or signalling on quiet NaN
+			 */
+			ret |= FPSCR_IOC;
+	}
+
+	if (vfp_single_packed_exponent(d) == 255 && vfp_single_packed_mantissa(d)) {
+		ret |= FPSCR_C | FPSCR_V;
+		if (signal_on_qnan || !(vfp_single_packed_mantissa(d) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1))))
+			/*
+			 * Signalling NaN, or signalling on quiet NaN
+			 */
+			ret |= FPSCR_IOC;
+	}
+
+	if (ret == 0) {
+		if (d == m || vfp_single_packed_abs(d | m) == 0) {
+			/*
+			 * equal
+			 */
+			ret |= FPSCR_Z | FPSCR_C;
+		} else if (vfp_single_packed_sign(d ^ m)) {
+			/*
+			 * different signs
+			 */
+			if (vfp_single_packed_sign(d))
+				/*
+				 * d is negative, so d < m
+				 */
+				ret |= FPSCR_N;
+			else
+				/*
+				 * d is positive, so d > m
+				 */
+				ret |= FPSCR_C;
+		} else if ((vfp_single_packed_sign(d) != 0) ^ (d < m)) {
+			/*
+			 * d < m
+			 */
+			ret |= FPSCR_N;
+		} else if ((vfp_single_packed_sign(d) != 0) ^ (d > m)) {
+			/*
+			 * d > m
+			 */
+			ret |= FPSCR_C;
+		}
+	}
+	return ret;
+}
+
+static u32 vfp_single_fcmp(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+	return vfp_compare(state, sd, 0, m, fpscr);
+}
+
+static u32 vfp_single_fcmpe(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+	return vfp_compare(state, sd, 1, m, fpscr);
+}
+
+static u32 vfp_single_fcmpz(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+	return vfp_compare(state, sd, 0, 0, fpscr);
+}
+
+static u32 vfp_single_fcmpez(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+	return vfp_compare(state, sd, 1, 0, fpscr);
+}
+
+static u32 vfp_single_fcvtd(ARMul_State* state, int dd, int unused, s32 m, u32 fpscr)
+{
+	struct vfp_single vsm;
+	struct vfp_double vdd;
+	int tm;
+	u32 exceptions = 0;
+
+	vfp_single_unpack(&vsm, m);
+
+	tm = vfp_single_type(&vsm);
+
+	/*
+	 * If we have a signalling NaN, signal invalid operation.
+	 */
+	if (tm == VFP_SNAN)
+		exceptions = FPSCR_IOC;
+
+	if (tm & VFP_DENORMAL)
+		vfp_single_normalise_denormal(&vsm);
+
+	vdd.sign = vsm.sign;
+	vdd.significand = (u64)vsm.significand << 32;
+
+	/*
+	 * If we have an infinity or NaN, the exponent must be 2047.
+	 */
+	if (tm & (VFP_INFINITY|VFP_NAN)) {
+		vdd.exponent = 2047;
+		if (tm == VFP_QNAN)
+			vdd.significand |= VFP_DOUBLE_SIGNIFICAND_QNAN;
+		goto pack_nan;
+	} else if (tm & VFP_ZERO)
+		vdd.exponent = 0;
+	else
+		vdd.exponent = vsm.exponent + (1023 - 127);
+
+	return vfp_double_normaliseround(state, dd, &vdd, fpscr, exceptions, "fcvtd");
+
+ pack_nan:
+	vfp_put_double(state, vfp_double_pack(&vdd), dd);
+	return exceptions;
+}
+
+static u32 vfp_single_fuito(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+	struct vfp_single vs;
+
+	vs.sign = 0;
+	vs.exponent = 127 + 31 - 1;
+	vs.significand = (u32)m;
+
+	return vfp_single_normaliseround(state, sd, &vs, fpscr, 0, "fuito");
+}
+
+static u32 vfp_single_fsito(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+	struct vfp_single vs;
+
+	vs.sign = (m & 0x80000000) >> 16;
+	vs.exponent = 127 + 31 - 1;
+	vs.significand = vs.sign ? -m : m;
+
+	return vfp_single_normaliseround(state, sd, &vs, fpscr, 0, "fsito");
+}
+
+static u32 vfp_single_ftoui(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+	struct vfp_single vsm;
+	u32 d, exceptions = 0;
+	int rmode = fpscr & FPSCR_RMODE_MASK;
+	int tm;
+
+	vfp_single_unpack(&vsm, m);
+	vfp_single_dump("VSM", &vsm);
+
+	/*
+	 * Do we have a denormalised number?
+	 */
+	tm = vfp_single_type(&vsm);
+	if (tm & VFP_DENORMAL)
+		exceptions |= FPSCR_IDC;
+
+	if (tm & VFP_NAN)
+		vsm.sign = 0;
+
+	if (vsm.exponent >= 127 + 32) {
+		d = vsm.sign ? 0 : 0xffffffff;
+		exceptions = FPSCR_IOC;
+	} else if (vsm.exponent >= 127 - 1) {
+		int shift = 127 + 31 - vsm.exponent;
+		u32 rem, incr = 0;
+
+		/*
+		 * 2^0 <= m < 2^32-2^8
+		 */
+		d = (vsm.significand << 1) >> shift;
+		rem = vsm.significand << (33 - shift);
+
+		if (rmode == FPSCR_ROUND_NEAREST) {
+			incr = 0x80000000;
+			if ((d & 1) == 0)
+				incr -= 1;
+		} else if (rmode == FPSCR_ROUND_TOZERO) {
+			incr = 0;
+		} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) {
+			incr = ~0;
+		}
+
+		if ((rem + incr) < rem) {
+			if (d < 0xffffffff)
+				d += 1;
+			else
+				exceptions |= FPSCR_IOC;
+		}
+
+		if (d && vsm.sign) {
+			d = 0;
+			exceptions |= FPSCR_IOC;
+		} else if (rem)
+			exceptions |= FPSCR_IXC;
+	} else {
+		d = 0;
+		if (vsm.exponent | vsm.significand) {
+			exceptions |= FPSCR_IXC;
+			if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0)
+				d = 1;
+			else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign) {
+				d = 0;
+				exceptions |= FPSCR_IOC;
+			}
+		}
+	}
+
+	pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
+
+	vfp_put_float(state, d, sd);
+
+	return exceptions;
+}
+
+static u32 vfp_single_ftouiz(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+	return vfp_single_ftoui(state, sd, unused, m, FPSCR_ROUND_TOZERO);
+}
+
+static u32 vfp_single_ftosi(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+	struct vfp_single vsm;
+	u32 d, exceptions = 0;
+	int rmode = fpscr & FPSCR_RMODE_MASK;
+	int tm;
+
+	vfp_single_unpack(&vsm, m);
+	vfp_single_dump("VSM", &vsm);
+
+	/*
+	 * Do we have a denormalised number?
+	 */
+	tm = vfp_single_type(&vsm);
+	if (vfp_single_type(&vsm) & VFP_DENORMAL)
+		exceptions |= FPSCR_IDC;
+
+	if (tm & VFP_NAN) {
+		d = 0;
+		exceptions |= FPSCR_IOC;
+	} else if (vsm.exponent >= 127 + 32) {
+		/*
+		 * m >= 2^31-2^7: invalid
+		 */
+		d = 0x7fffffff;
+		if (vsm.sign)
+			d = ~d;
+		exceptions |= FPSCR_IOC;
+	} else if (vsm.exponent >= 127 - 1) {
+		int shift = 127 + 31 - vsm.exponent;
+		u32 rem, incr = 0;
+
+		/* 2^0 <= m <= 2^31-2^7 */
+		d = (vsm.significand << 1) >> shift;
+		rem = vsm.significand << (33 - shift);
+
+		if (rmode == FPSCR_ROUND_NEAREST) {
+			incr = 0x80000000;
+			if ((d & 1) == 0)
+				incr -= 1;
+		} else if (rmode == FPSCR_ROUND_TOZERO) {
+			incr = 0;
+		} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) {
+			incr = ~0;
+		}
+
+		if ((rem + incr) < rem && d < 0xffffffff)
+			d += 1;
+		if (d > 0x7fffffff + (vsm.sign != 0)) {
+			d = 0x7fffffff + (vsm.sign != 0);
+			exceptions |= FPSCR_IOC;
+		} else if (rem)
+			exceptions |= FPSCR_IXC;
+
+		if (vsm.sign)
+			d = -d;
+	} else {
+		d = 0;
+		if (vsm.exponent | vsm.significand) {
+			exceptions |= FPSCR_IXC;
+			if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0)
+				d = 1;
+			else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign)
+				d = -1;
+		}
+	}
+
+	pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
+
+	vfp_put_float(state, (s32)d, sd);
+
+	return exceptions;
+}
+
+static u32 vfp_single_ftosiz(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+	return vfp_single_ftosi(state, sd, unused, m, FPSCR_ROUND_TOZERO);
+}
+
+static struct op fops_ext[] = {
+    { vfp_single_fcpy,   0 },                 //0x00000000 - FEXT_FCPY
+    { vfp_single_fabs,   0 },                 //0x00000001 - FEXT_FABS
+    { vfp_single_fneg,   0 },                 //0x00000002 - FEXT_FNEG
+    { vfp_single_fsqrt,  0 },                 //0x00000003 - FEXT_FSQRT
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { vfp_single_fcmp,   OP_SCALAR },         //0x00000008 - FEXT_FCMP
+    { vfp_single_fcmpe,  OP_SCALAR },         //0x00000009 - FEXT_FCMPE
+    { vfp_single_fcmpz,  OP_SCALAR },         //0x0000000A - FEXT_FCMPZ
+    { vfp_single_fcmpez, OP_SCALAR },         //0x0000000B - FEXT_FCMPEZ
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { vfp_single_fcvtd,  OP_SCALAR|OP_DD },   //0x0000000F - FEXT_FCVT
+    { vfp_single_fuito,  OP_SCALAR },         //0x00000010 - FEXT_FUITO
+    { vfp_single_fsito,  OP_SCALAR },         //0x00000011 - FEXT_FSITO
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { vfp_single_ftoui,  OP_SCALAR },         //0x00000018 - FEXT_FTOUI
+    { vfp_single_ftouiz, OP_SCALAR },         //0x00000019 - FEXT_FTOUIZ
+    { vfp_single_ftosi,  OP_SCALAR },         //0x0000001A - FEXT_FTOSI
+    { vfp_single_ftosiz, OP_SCALAR },         //0x0000001B - FEXT_FTOSIZ
+};
+
+
+
+
+
+static u32
+vfp_single_fadd_nonnumber(struct vfp_single *vsd, struct vfp_single *vsn,
+			  struct vfp_single *vsm, u32 fpscr)
+{
+	struct vfp_single *vsp;
+	u32 exceptions = 0;
+	int tn, tm;
+
+	tn = vfp_single_type(vsn);
+	tm = vfp_single_type(vsm);
+
+	if (tn & tm & VFP_INFINITY) {
+		/*
+		 * Two infinities.  Are they different signs?
+		 */
+		if (vsn->sign ^ vsm->sign) {
+			/*
+			 * different signs -> invalid
+			 */
+			exceptions = FPSCR_IOC;
+			vsp = &vfp_single_default_qnan;
+		} else {
+			/*
+			 * same signs -> valid
+			 */
+			vsp = vsn;
+		}
+	} else if (tn & VFP_INFINITY && tm & VFP_NUMBER) {
+		/*
+		 * One infinity and one number -> infinity
+		 */
+		vsp = vsn;
+	} else {
+		/*
+		 * 'n' is a NaN of some type
+		 */
+		return vfp_propagate_nan(vsd, vsn, vsm, fpscr);
+	}
+	*vsd = *vsp;
+	return exceptions;
+}
+
+static u32
+vfp_single_add(struct vfp_single *vsd, struct vfp_single *vsn,
+	       struct vfp_single *vsm, u32 fpscr)
+{
+	u32 exp_diff, m_sig;
+
+	if (vsn->significand & 0x80000000 ||
+	    vsm->significand & 0x80000000) {
+		pr_info("VFP: bad FP values\n");
+		vfp_single_dump("VSN", vsn);
+		vfp_single_dump("VSM", vsm);
+	}
+
+	/*
+	 * Ensure that 'n' is the largest magnitude number.  Note that
+	 * if 'n' and 'm' have equal exponents, we do not swap them.
+	 * This ensures that NaN propagation works correctly.
+	 */
+	if (vsn->exponent < vsm->exponent) {
+		struct vfp_single *t = vsn;
+		vsn = vsm;
+		vsm = t;
+	}
+
+	/*
+	 * Is 'n' an infinity or a NaN?  Note that 'm' may be a number,
+	 * infinity or a NaN here.
+	 */
+	if (vsn->exponent == 255)
+		return vfp_single_fadd_nonnumber(vsd, vsn, vsm, fpscr);
+
+	/*
+	 * We have two proper numbers, where 'vsn' is the larger magnitude.
+	 *
+	 * Copy 'n' to 'd' before doing the arithmetic.
+	 */
+	*vsd = *vsn;
+
+	/*
+	 * Align both numbers.
+	 */
+	exp_diff = vsn->exponent - vsm->exponent;
+	m_sig = vfp_shiftright32jamming(vsm->significand, exp_diff);
+
+	/*
+	 * If the signs are different, we are really subtracting.
+	 */
+	if (vsn->sign ^ vsm->sign) {
+		m_sig = vsn->significand - m_sig;
+		if ((s32)m_sig < 0) {
+			vsd->sign = vfp_sign_negate(vsd->sign);
+			m_sig = -m_sig;
+		} else if (m_sig == 0) {
+			vsd->sign = (fpscr & FPSCR_RMODE_MASK) ==
+				      FPSCR_ROUND_MINUSINF ? 0x8000 : 0;
+		}
+	} else {
+		m_sig = vsn->significand + m_sig;
+	}
+	vsd->significand = m_sig;
+
+	return 0;
+}
+
+static u32
+vfp_single_multiply(struct vfp_single *vsd, struct vfp_single *vsn, struct vfp_single *vsm, u32 fpscr)
+{
+	vfp_single_dump("VSN", vsn);
+	vfp_single_dump("VSM", vsm);
+
+	/*
+	 * Ensure that 'n' is the largest magnitude number.  Note that
+	 * if 'n' and 'm' have equal exponents, we do not swap them.
+	 * This ensures that NaN propagation works correctly.
+	 */
+	if (vsn->exponent < vsm->exponent) {
+		struct vfp_single *t = vsn;
+		vsn = vsm;
+		vsm = t;
+		pr_debug("VFP: swapping M <-> N\n");
+	}
+
+	vsd->sign = vsn->sign ^ vsm->sign;
+
+	/*
+	 * If 'n' is an infinity or NaN, handle it.  'm' may be anything.
+	 */
+	if (vsn->exponent == 255) {
+		if (vsn->significand || (vsm->exponent == 255 && vsm->significand))
+			return vfp_propagate_nan(vsd, vsn, vsm, fpscr);
+		if ((vsm->exponent | vsm->significand) == 0) {
+			*vsd = vfp_single_default_qnan;
+			return FPSCR_IOC;
+		}
+		vsd->exponent = vsn->exponent;
+		vsd->significand = 0;
+		return 0;
+	}
+
+	/*
+	 * If 'm' is zero, the result is always zero.  In this case,
+	 * 'n' may be zero or a number, but it doesn't matter which.
+	 */
+	if ((vsm->exponent | vsm->significand) == 0) {
+		vsd->exponent = 0;
+		vsd->significand = 0;
+		return 0;
+	}
+
+	/*
+	 * We add 2 to the destination exponent for the same reason as
+	 * the addition case - though this time we have +1 from each
+	 * input operand.
+	 */
+	vsd->exponent = vsn->exponent + vsm->exponent - 127 + 2;
+	vsd->significand = vfp_hi64to32jamming((u64)vsn->significand * vsm->significand);
+
+	vfp_single_dump("VSD", vsd);
+	return 0;
+}
+
+#define NEG_MULTIPLY	(1 << 0)
+#define NEG_SUBTRACT	(1 << 1)
+
+static u32
+vfp_single_multiply_accumulate(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr, u32 negate, char *func)
+{
+	struct vfp_single vsd, vsp, vsn, vsm;
+	u32 exceptions;
+	s32 v;
+
+	v = vfp_get_float(state, sn);
+	pr_debug("VFP: s%u = %08x\n", sn, v);
+	vfp_single_unpack(&vsn, v);
+	if (vsn.exponent == 0 && vsn.significand)
+		vfp_single_normalise_denormal(&vsn);
+
+	vfp_single_unpack(&vsm, m);
+	if (vsm.exponent == 0 && vsm.significand)
+		vfp_single_normalise_denormal(&vsm);
+
+	exceptions = vfp_single_multiply(&vsp, &vsn, &vsm, fpscr);
+	if (negate & NEG_MULTIPLY)
+		vsp.sign = vfp_sign_negate(vsp.sign);
+
+	v = vfp_get_float(state, sd);
+	pr_debug("VFP: s%u = %08x\n", sd, v);
+	vfp_single_unpack(&vsn, v);
+	if (negate & NEG_SUBTRACT)
+		vsn.sign = vfp_sign_negate(vsn.sign);
+
+	exceptions |= vfp_single_add(&vsd, &vsn, &vsp, fpscr);
+
+	return vfp_single_normaliseround(state, sd, &vsd, fpscr, exceptions, func);
+}
+
+/*
+ * Standard operations
+ */
+
+/*
+ * sd = sd + (sn * sm)
+ */
+static u32 vfp_single_fmac(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
+{
+	pr_debug("In %sVFP: s%u = %08x\n", __FUNCTION__, sn, sd);
+	return vfp_single_multiply_accumulate(state, sd, sn, m, fpscr, 0, "fmac");
+}
+
+/*
+ * sd = sd - (sn * sm)
+ */
+static u32 vfp_single_fnmac(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
+{
+	pr_debug("In %sVFP: s%u = %08x\n", __FUNCTION__, sd, sn);
+	return vfp_single_multiply_accumulate(state, sd, sn, m, fpscr, NEG_MULTIPLY, "fnmac");
+}
+
+/*
+ * sd = -sd + (sn * sm)
+ */
+static u32 vfp_single_fmsc(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
+{
+	pr_debug("In %sVFP: s%u = %08x\n", __FUNCTION__, sn, sd);
+	return vfp_single_multiply_accumulate(state, sd, sn, m, fpscr, NEG_SUBTRACT, "fmsc");
+}
+
+/*
+ * sd = -sd - (sn * sm)
+ */
+static u32 vfp_single_fnmsc(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
+{
+	pr_debug("In %sVFP: s%u = %08x\n", __FUNCTION__, sn, sd);
+	return vfp_single_multiply_accumulate(state, sd, sn, m, fpscr, NEG_SUBTRACT | NEG_MULTIPLY, "fnmsc");
+}
+
+/*
+ * sd = sn * sm
+ */
+static u32 vfp_single_fmul(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
+{
+	struct vfp_single vsd, vsn, vsm;
+	u32 exceptions;
+	s32 n = vfp_get_float(state, sn);
+
+	pr_debug("In %sVFP: s%u = %08x\n", __FUNCTION__, sn, n);
+
+	vfp_single_unpack(&vsn, n);
+	if (vsn.exponent == 0 && vsn.significand)
+		vfp_single_normalise_denormal(&vsn);
+
+	vfp_single_unpack(&vsm, m);
+	if (vsm.exponent == 0 && vsm.significand)
+		vfp_single_normalise_denormal(&vsm);
+
+	exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr);
+	return vfp_single_normaliseround(state, sd, &vsd, fpscr, exceptions, "fmul");
+}
+
+/*
+ * sd = -(sn * sm)
+ */
+static u32 vfp_single_fnmul(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
+{
+	struct vfp_single vsd, vsn, vsm;
+	u32 exceptions;
+	s32 n = vfp_get_float(state, sn);
+
+	pr_debug("VFP: s%u = %08x\n", sn, n);
+
+	vfp_single_unpack(&vsn, n);
+	if (vsn.exponent == 0 && vsn.significand)
+		vfp_single_normalise_denormal(&vsn);
+
+	vfp_single_unpack(&vsm, m);
+	if (vsm.exponent == 0 && vsm.significand)
+		vfp_single_normalise_denormal(&vsm);
+
+	exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr);
+	vsd.sign = vfp_sign_negate(vsd.sign);
+	return vfp_single_normaliseround(state, sd, &vsd, fpscr, exceptions, "fnmul");
+}
+
+/*
+ * sd = sn + sm
+ */
+static u32 vfp_single_fadd(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
+{
+	struct vfp_single vsd, vsn, vsm;
+	u32 exceptions;
+	s32 n = vfp_get_float(state, sn);
+
+	pr_debug("VFP: s%u = %08x\n", sn, n);
+
+	/*
+	 * Unpack and normalise denormals.
+	 */
+	vfp_single_unpack(&vsn, n);
+	if (vsn.exponent == 0 && vsn.significand)
+		vfp_single_normalise_denormal(&vsn);
+
+	vfp_single_unpack(&vsm, m);
+	if (vsm.exponent == 0 && vsm.significand)
+		vfp_single_normalise_denormal(&vsm);
+
+	exceptions = vfp_single_add(&vsd, &vsn, &vsm, fpscr);
+
+	return vfp_single_normaliseround(state, sd, &vsd, fpscr, exceptions, "fadd");
+}
+
+/*
+ * sd = sn - sm
+ */
+static u32 vfp_single_fsub(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
+{
+	pr_debug("In %sVFP: s%u = %08x\n", __FUNCTION__, sn, sd);
+	/*
+	 * Subtraction is addition with one sign inverted.
+	 */
+	return vfp_single_fadd(state, sd, sn, vfp_single_packed_negate(m), fpscr);
+}
+
+/*
+ * sd = sn / sm
+ */
+static u32 vfp_single_fdiv(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
+{
+	struct vfp_single vsd, vsn, vsm;
+	u32 exceptions = 0;
+	s32 n = vfp_get_float(state, sn);
+	int tm, tn;
+
+	pr_debug("VFP: s%u = %08x\n", sn, n);
+
+	vfp_single_unpack(&vsn, n);
+	vfp_single_unpack(&vsm, m);
+
+	vsd.sign = vsn.sign ^ vsm.sign;
+
+	tn = vfp_single_type(&vsn);
+	tm = vfp_single_type(&vsm);
+
+	/*
+	 * Is n a NAN?
+	 */
+	if (tn & VFP_NAN)
+		goto vsn_nan;
+
+	/*
+	 * Is m a NAN?
+	 */
+	if (tm & VFP_NAN)
+		goto vsm_nan;
+
+	/*
+	 * If n and m are infinity, the result is invalid
+	 * If n and m are zero, the result is invalid
+	 */
+	if (tm & tn & (VFP_INFINITY|VFP_ZERO))
+		goto invalid;
+
+	/*
+	 * If n is infinity, the result is infinity
+	 */
+	if (tn & VFP_INFINITY)
+		goto infinity;
+
+	/*
+	 * If m is zero, raise div0 exception
+	 */
+	if (tm & VFP_ZERO)
+		goto divzero;
+
+	/*
+	 * If m is infinity, or n is zero, the result is zero
+	 */
+	if (tm & VFP_INFINITY || tn & VFP_ZERO)
+		goto zero;
+
+	if (tn & VFP_DENORMAL)
+		vfp_single_normalise_denormal(&vsn);
+	if (tm & VFP_DENORMAL)
+		vfp_single_normalise_denormal(&vsm);
+
+	/*
+	 * Ok, we have two numbers, we can perform division.
+	 */
+	vsd.exponent = vsn.exponent - vsm.exponent + 127 - 1;
+	vsm.significand <<= 1;
+	if (vsm.significand <= (2 * vsn.significand)) {
+		vsn.significand >>= 1;
+		vsd.exponent++;
+	}
+	{
+		u64 significand = (u64)vsn.significand << 32;
+		do_div(significand, vsm.significand);
+		vsd.significand = significand;
+	}
+	if ((vsd.significand & 0x3f) == 0)
+		vsd.significand |= ((u64)vsm.significand * vsd.significand != (u64)vsn.significand << 32);
+
+	return vfp_single_normaliseround(state, sd, &vsd, fpscr, 0, "fdiv");
+
+ vsn_nan:
+	exceptions = vfp_propagate_nan(&vsd, &vsn, &vsm, fpscr);
+ pack:
+	vfp_put_float(state, vfp_single_pack(&vsd), sd);
+	return exceptions;
+
+ vsm_nan:
+	exceptions = vfp_propagate_nan(&vsd, &vsm, &vsn, fpscr);
+	goto pack;
+
+ zero:
+	vsd.exponent = 0;
+	vsd.significand = 0;
+	goto pack;
+
+ divzero:
+	exceptions = FPSCR_DZC;
+ infinity:
+	vsd.exponent = 255;
+	vsd.significand = 0;
+	goto pack;
+
+ invalid:
+	vfp_put_float(state, vfp_single_pack(&vfp_single_default_qnan), sd);
+	return FPSCR_IOC;
+}
+
+static struct op fops[] = {
+	{ vfp_single_fmac,  0 },
+	{ vfp_single_fmsc,  0 },
+	{ vfp_single_fmul,  0 },
+	{ vfp_single_fadd,  0 },
+	{ vfp_single_fnmac, 0 },
+	{ vfp_single_fnmsc, 0 },
+	{ vfp_single_fnmul, 0 },
+	{ vfp_single_fsub,  0 },
+	{ vfp_single_fdiv,  0 },
+};
+
+#define FREG_BANK(x)	((x) & 0x18)
+#define FREG_IDX(x)	((x) & 7)
+
+u32 vfp_single_cpdo(ARMul_State* state, u32 inst, u32 fpscr)
+{
+	u32 op = inst & FOP_MASK;
+	u32 exceptions = 0;
+	unsigned int dest;
+	unsigned int sn = vfp_get_sn(inst);
+	unsigned int sm = vfp_get_sm(inst);
+	unsigned int vecitr, veclen, vecstride;
+	struct op *fop;
+	pr_debug("In %s\n", __FUNCTION__);
+
+	vecstride = 1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK);
+
+	fop = (op == FOP_EXT) ? &fops_ext[FEXT_TO_IDX(inst)] : &fops[FOP_TO_IDX(op)];
+
+	/*
+	 * fcvtsd takes a dN register number as destination, not sN.
+	 * Technically, if bit 0 of dd is set, this is an invalid
+	 * instruction.  However, we ignore this for efficiency.
+	 * It also only operates on scalars.
+	 */
+	if (fop->flags & OP_DD)
+		dest = vfp_get_dd(inst);
+	else
+		dest = vfp_get_sd(inst);
+
+	/*
+	 * If destination bank is zero, vector length is always '1'.
+	 * ARM DDI0100F C5.1.3, C5.3.2.
+	 */
+	if ((fop->flags & OP_SCALAR) || FREG_BANK(dest) == 0)
+		veclen = 0;
+	else
+		veclen = fpscr & FPSCR_LENGTH_MASK;
+
+	pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride,
+		 (veclen >> FPSCR_LENGTH_BIT) + 1);
+
+	if (!fop->fn) {
+		printf("VFP: could not find single op %d, inst=0x%x@0x%x\n", FEXT_TO_IDX(inst), inst, state->Reg[15]);
+		exit(-1);
+		goto invalid;
+	}
+
+	for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) {
+		s32 m = vfp_get_float(state, sm);
+		u32 except;
+		char type;
+
+		type = fop->flags & OP_DD ? 'd' : 's';
+		if (op == FOP_EXT)
+			pr_debug("VFP: itr%d (%c%u) = op[%u] (s%u=%08x)\n",
+				 vecitr >> FPSCR_LENGTH_BIT, type, dest, sn,
+				 sm, m);
+		else
+			pr_debug("VFP: itr%d (%c%u) = (s%u) op[%u] (s%u=%08x)\n",
+				 vecitr >> FPSCR_LENGTH_BIT, type, dest, sn,
+				 FOP_TO_IDX(op), sm, m);
+
+		except = fop->fn(state, dest, sn, m, fpscr);
+		pr_debug("VFP: itr%d: exceptions=%08x\n",
+			 vecitr >> FPSCR_LENGTH_BIT, except);
+
+		exceptions |= except;
+
+		/*
+		 * CHECK: It appears to be undefined whether we stop when
+		 * we encounter an exception.  We continue.
+		 */
+		dest = FREG_BANK(dest) + ((FREG_IDX(dest) + vecstride) & 7);
+		sn = FREG_BANK(sn) + ((FREG_IDX(sn) + vecstride) & 7);
+		if (FREG_BANK(sm) != 0)
+			sm = FREG_BANK(sm) + ((FREG_IDX(sm) + vecstride) & 7);
+	}
+	return exceptions;
+
+ invalid:
+	return (u32)-1;
+}
diff --git a/src/core/core.vcxproj b/src/core/core.vcxproj
index be8448969..41af5801d 100644
--- a/src/core/core.vcxproj
+++ b/src/core/core.vcxproj
@@ -138,6 +138,8 @@
   </ItemGroup>
   <ItemGroup>
     <ClCompile Include="arm\disassembler\arm_disasm.cpp" />
+    <ClCompile Include="arm\disassembler\load_symbol_map.cpp" />
+    <ClCompile Include="arm\interpreter\armcopro.cpp" />
     <ClCompile Include="arm\interpreter\armemu.cpp" />
     <ClCompile Include="arm\interpreter\arminit.cpp" />
     <ClCompile Include="arm\interpreter\armmmu.cpp" />
@@ -145,15 +147,27 @@
     <ClCompile Include="arm\interpreter\armsupp.cpp" />
     <ClCompile Include="arm\interpreter\armvirt.cpp" />
     <ClCompile Include="arm\interpreter\arm_interpreter.cpp" />
+    <ClCompile Include="arm\interpreter\mmu\arm1176jzf_s_mmu.cpp" />
+    <ClCompile Include="arm\interpreter\mmu\cache.cpp" />
+    <ClCompile Include="arm\interpreter\mmu\maverick.cpp" />
+    <ClCompile Include="arm\interpreter\mmu\rb.cpp" />
+    <ClCompile Include="arm\interpreter\mmu\sa_mmu.cpp" />
+    <ClCompile Include="arm\interpreter\mmu\tlb.cpp" />
+    <ClCompile Include="arm\interpreter\mmu\wb.cpp" />
+    <ClCompile Include="arm\interpreter\mmu\xscale_copro.cpp" />
     <ClCompile Include="arm\interpreter\thumbemu.cpp" />
-    <ClCompile Include="arm\mmu\arm1176jzf_s_mmu.cpp" />
+    <ClCompile Include="arm\interpreter\vfp\vfp.cpp" />
+    <ClCompile Include="arm\interpreter\vfp\vfpdouble.cpp" />
+    <ClCompile Include="arm\interpreter\vfp\vfpinstr.cpp" />
+    <ClCompile Include="arm\interpreter\vfp\vfpsingle.cpp" />
     <ClCompile Include="core.cpp" />
     <ClCompile Include="core_timing.cpp" />
     <ClCompile Include="elf\elf_reader.cpp" />
     <ClCompile Include="file_sys\directory_file_system.cpp" />
     <ClCompile Include="file_sys\meta_file_system.cpp" />
+    <ClCompile Include="hle\config_mem.cpp" />
+    <ClCompile Include="hle\coprocessor.cpp" />
     <ClCompile Include="hle\hle.cpp" />
-    <ClCompile Include="hle\mrc.cpp" />
     <ClCompile Include="hle\service\apt.cpp" />
     <ClCompile Include="hle\service\gsp.cpp" />
     <ClCompile Include="hle\service\hid.cpp" />
@@ -171,6 +185,7 @@
   <ItemGroup>
     <ClInclude Include="arm\arm_interface.h" />
     <ClInclude Include="arm\disassembler\arm_disasm.h" />
+    <ClInclude Include="arm\disassembler\load_symbol_map.h" />
     <ClInclude Include="arm\interpreter\armcpu.h" />
     <ClInclude Include="arm\interpreter\armdefs.h" />
     <ClInclude Include="arm\interpreter\armemu.h" />
@@ -178,12 +193,16 @@
     <ClInclude Include="arm\interpreter\armos.h" />
     <ClInclude Include="arm\interpreter\arm_interpreter.h" />
     <ClInclude Include="arm\interpreter\arm_regformat.h" />
+    <ClInclude Include="arm\interpreter\mmu\arm1176jzf_s_mmu.h" />
+    <ClInclude Include="arm\interpreter\mmu\cache.h" />
+    <ClInclude Include="arm\interpreter\mmu\rb.h" />
+    <ClInclude Include="arm\interpreter\mmu\sa_mmu.h" />
+    <ClInclude Include="arm\interpreter\mmu\tlb.h" />
+    <ClInclude Include="arm\interpreter\mmu\wb.h" />
     <ClInclude Include="arm\interpreter\skyeye_defs.h" />
-    <ClInclude Include="arm\mmu\arm1176jzf_s_mmu.h" />
-    <ClInclude Include="arm\mmu\cache.h" />
-    <ClInclude Include="arm\mmu\rb.h" />
-    <ClInclude Include="arm\mmu\tlb.h" />
-    <ClInclude Include="arm\mmu\wb.h" />
+    <ClInclude Include="arm\interpreter\vfp\asm_vfp.h" />
+    <ClInclude Include="arm\interpreter\vfp\vfp.h" />
+    <ClInclude Include="arm\interpreter\vfp\vfp_helper.h" />
     <ClInclude Include="core.h" />
     <ClInclude Include="core_timing.h" />
     <ClInclude Include="elf\elf_reader.h" />
@@ -191,9 +210,10 @@
     <ClInclude Include="file_sys\directory_file_system.h" />
     <ClInclude Include="file_sys\file_sys.h" />
     <ClInclude Include="file_sys\meta_file_system.h" />
+    <ClInclude Include="hle\config_mem.h" />
+    <ClInclude Include="hle\coprocessor.h" />
     <ClInclude Include="hle\function_wrappers.h" />
     <ClInclude Include="hle\hle.h" />
-    <ClInclude Include="hle\mrc.h" />
     <ClInclude Include="hle\service\apt.h" />
     <ClInclude Include="hle\service\gsp.h" />
     <ClInclude Include="hle\service\hid.h" />
diff --git a/src/core/core.vcxproj.filters b/src/core/core.vcxproj.filters
index b5473bc41..edf34ce2f 100644
--- a/src/core/core.vcxproj.filters
+++ b/src/core/core.vcxproj.filters
@@ -7,9 +7,6 @@
     <Filter Include="arm\disassembler">
       <UniqueIdentifier>{61100188-a726-4024-ab16-95ee242b446e}</UniqueIdentifier>
     </Filter>
-    <Filter Include="arm\mmu">
-      <UniqueIdentifier>{a64d3c8a-747a-491b-b782-6e2622bedf24}</UniqueIdentifier>
-    </Filter>
     <Filter Include="file_sys">
       <UniqueIdentifier>{7f618562-73d1-4f55-9628-887497c27654}</UniqueIdentifier>
     </Filter>
@@ -28,6 +25,12 @@
     <Filter Include="hle\service">
       <UniqueIdentifier>{812c5189-ca49-4704-b842-3ffad09092d3}</UniqueIdentifier>
     </Filter>
+    <Filter Include="arm\interpreter\vfp">
+      <UniqueIdentifier>{de62238f-a28e-4a33-8495-23fed6784588}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="arm\interpreter\mmu">
+      <UniqueIdentifier>{13ef9860-2ba0-47e9-a93d-b4052adab269}</UniqueIdentifier>
+    </Filter>
   </ItemGroup>
   <ItemGroup>
     <ClCompile Include="arm\disassembler\arm_disasm.cpp">
@@ -57,9 +60,6 @@
     <ClCompile Include="arm\interpreter\thumbemu.cpp">
       <Filter>arm\interpreter</Filter>
     </ClCompile>
-    <ClCompile Include="arm\mmu\arm1176jzf_s_mmu.cpp">
-      <Filter>arm\mmu</Filter>
-    </ClCompile>
     <ClCompile Include="file_sys\directory_file_system.cpp">
       <Filter>file_sys</Filter>
     </ClCompile>
@@ -105,9 +105,54 @@
     <ClCompile Include="hw\lcd.cpp">
       <Filter>hw</Filter>
     </ClCompile>
-    <ClCompile Include="hle\mrc.cpp">
+    <ClCompile Include="arm\disassembler\load_symbol_map.cpp">
+      <Filter>arm\disassembler</Filter>
+    </ClCompile>
+    <ClCompile Include="hle\coprocessor.cpp">
+      <Filter>hle</Filter>
+    </ClCompile>
+    <ClCompile Include="hle\config_mem.cpp">
       <Filter>hle</Filter>
     </ClCompile>
+    <ClCompile Include="arm\interpreter\vfp\vfp.cpp">
+      <Filter>arm\interpreter\vfp</Filter>
+    </ClCompile>
+    <ClCompile Include="arm\interpreter\vfp\vfpinstr.cpp">
+      <Filter>arm\interpreter\vfp</Filter>
+    </ClCompile>
+    <ClCompile Include="arm\interpreter\vfp\vfpdouble.cpp">
+      <Filter>arm\interpreter\vfp</Filter>
+    </ClCompile>
+    <ClCompile Include="arm\interpreter\vfp\vfpsingle.cpp">
+      <Filter>arm\interpreter\vfp</Filter>
+    </ClCompile>
+    <ClCompile Include="arm\interpreter\mmu\arm1176jzf_s_mmu.cpp">
+      <Filter>arm\interpreter\mmu</Filter>
+    </ClCompile>
+    <ClCompile Include="arm\interpreter\mmu\xscale_copro.cpp">
+      <Filter>arm\interpreter\mmu</Filter>
+    </ClCompile>
+    <ClCompile Include="arm\interpreter\mmu\sa_mmu.cpp">
+      <Filter>arm\interpreter\mmu</Filter>
+    </ClCompile>
+    <ClCompile Include="arm\interpreter\mmu\cache.cpp">
+      <Filter>arm\interpreter\mmu</Filter>
+    </ClCompile>
+    <ClCompile Include="arm\interpreter\mmu\rb.cpp">
+      <Filter>arm\interpreter\mmu</Filter>
+    </ClCompile>
+    <ClCompile Include="arm\interpreter\mmu\tlb.cpp">
+      <Filter>arm\interpreter\mmu</Filter>
+    </ClCompile>
+    <ClCompile Include="arm\interpreter\mmu\wb.cpp">
+      <Filter>arm\interpreter\mmu</Filter>
+    </ClCompile>
+    <ClCompile Include="arm\interpreter\armcopro.cpp">
+      <Filter>arm</Filter>
+    </ClCompile>
+    <ClCompile Include="arm\interpreter\mmu\maverick.cpp">
+      <Filter>arm\interpreter\mmu</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="arm\disassembler\arm_disasm.h">
@@ -137,21 +182,6 @@
     <ClInclude Include="arm\interpreter\skyeye_defs.h">
       <Filter>arm\interpreter</Filter>
     </ClInclude>
-    <ClInclude Include="arm\mmu\arm1176jzf_s_mmu.h">
-      <Filter>arm\mmu</Filter>
-    </ClInclude>
-    <ClInclude Include="arm\mmu\cache.h">
-      <Filter>arm\mmu</Filter>
-    </ClInclude>
-    <ClInclude Include="arm\mmu\rb.h">
-      <Filter>arm\mmu</Filter>
-    </ClInclude>
-    <ClInclude Include="arm\mmu\tlb.h">
-      <Filter>arm\mmu</Filter>
-    </ClInclude>
-    <ClInclude Include="arm\mmu\wb.h">
-      <Filter>arm\mmu</Filter>
-    </ClInclude>
     <ClInclude Include="file_sys\directory_file_system.h">
       <Filter>file_sys</Filter>
     </ClInclude>
@@ -208,9 +238,42 @@
     <ClInclude Include="hw\lcd.h">
       <Filter>hw</Filter>
     </ClInclude>
-    <ClInclude Include="hle\mrc.h">
+    <ClInclude Include="arm\disassembler\load_symbol_map.h">
+      <Filter>arm\disassembler</Filter>
+    </ClInclude>
+    <ClInclude Include="hle\coprocessor.h">
+      <Filter>hle</Filter>
+    </ClInclude>
+    <ClInclude Include="hle\config_mem.h">
       <Filter>hle</Filter>
     </ClInclude>
+    <ClInclude Include="arm\interpreter\vfp\asm_vfp.h">
+      <Filter>arm\interpreter\vfp</Filter>
+    </ClInclude>
+    <ClInclude Include="arm\interpreter\vfp\vfp.h">
+      <Filter>arm\interpreter\vfp</Filter>
+    </ClInclude>
+    <ClInclude Include="arm\interpreter\vfp\vfp_helper.h">
+      <Filter>arm\interpreter\vfp</Filter>
+    </ClInclude>
+    <ClInclude Include="arm\interpreter\mmu\arm1176jzf_s_mmu.h">
+      <Filter>arm\interpreter\mmu</Filter>
+    </ClInclude>
+    <ClInclude Include="arm\interpreter\mmu\cache.h">
+      <Filter>arm\interpreter\mmu</Filter>
+    </ClInclude>
+    <ClInclude Include="arm\interpreter\mmu\rb.h">
+      <Filter>arm\interpreter\mmu</Filter>
+    </ClInclude>
+    <ClInclude Include="arm\interpreter\mmu\tlb.h">
+      <Filter>arm\interpreter\mmu</Filter>
+    </ClInclude>
+    <ClInclude Include="arm\interpreter\mmu\wb.h">
+      <Filter>arm\interpreter\mmu</Filter>
+    </ClInclude>
+    <ClInclude Include="arm\interpreter\mmu\sa_mmu.h">
+      <Filter>arm\interpreter\mmu</Filter>
+    </ClInclude>
   </ItemGroup>
   <ItemGroup>
     <Text Include="CMakeLists.txt" />
diff --git a/src/core/hle/config_mem.cpp b/src/core/hle/config_mem.cpp
new file mode 100644
index 000000000..48aa878cc
--- /dev/null
+++ b/src/core/hle/config_mem.cpp
@@ -0,0 +1,70 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2
+// Refer to the license.txt file included.  
+
+#include "common/common_types.h"
+#include "common/log.h"
+
+#include "core/hle/config_mem.h"
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+namespace ConfigMem {
+
+enum {
+    KERNEL_VERSIONREVISION  = 0x1FF80001,
+    KERNEL_VERSIONMINOR     = 0x1FF80002,
+    KERNEL_VERSIONMAJOR     = 0x1FF80003,
+    UPDATEFLAG              = 0x1FF80004,
+    NSTID                   = 0x1FF80008,
+    SYSCOREVER              = 0x1FF80010,
+    UNITINFO                = 0x1FF80014,
+    KERNEL_CTRSDKVERSION    = 0x1FF80018,
+    APPMEMTYPE              = 0x1FF80030,
+    APPMEMALLOC             = 0x1FF80040,
+    FIRM_VERSIONREVISION    = 0x1FF80061,
+    FIRM_VERSIONMINOR       = 0x1FF80062,
+    FIRM_VERSIONMAJOR       = 0x1FF80063,
+    FIRM_SYSCOREVER         = 0x1FF80064,
+    FIRM_CTRSDKVERSION      = 0x1FF80068,
+};
+
+template <typename T>
+inline void Read(T &var, const u32 addr) {
+    switch (addr) {
+
+    // Bit 0 set for Retail
+    case UNITINFO:
+        var = 0x00000001;
+        break;
+
+    // Set app memory size to 64MB?
+    case APPMEMALLOC:
+        var = 0x04000000;
+        break;
+
+    // Unknown - normally set to: 0x08000000 - (APPMEMALLOC + *0x1FF80048)
+    // (Total FCRAM size - APPMEMALLOC - *0x1FF80048)
+    case 0x1FF80044:
+        var = 0x08000000 - (0x04000000 + 0x1400000);
+        break;
+
+    // Unknown - normally set to: 0x1400000 (20MB)
+    case 0x1FF80048:
+        var = 0x1400000;
+        break;
+
+    default:
+        ERROR_LOG(HLE, "unknown ConfigMem::Read%d @ 0x%08X", sizeof(var) * 8, addr);
+    }
+}
+
+// Explicitly instantiate template functions because we aren't defining this in the header:
+
+template void Read<u64>(u64 &var, const u32 addr);
+template void Read<u32>(u32 &var, const u32 addr);
+template void Read<u16>(u16 &var, const u32 addr);
+template void Read<u8>(u8 &var, const u32 addr);
+
+
+} // namespace
diff --git a/src/core/hle/config_mem.h b/src/core/hle/config_mem.h
new file mode 100644
index 000000000..da396a3e6
--- /dev/null
+++ b/src/core/hle/config_mem.h
@@ -0,0 +1,21 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2
+// Refer to the license.txt file included.  
+
+#pragma once
+
+// Configuration memory stores various hardware/kernel configuration settings. This memory page is 
+// read-only for ARM11 processes. I'm guessing this would normally be written to by the firmware/
+// bootrom. Because we're not emulating this, and essentially just "stubbing" the functionality, I'm
+// putting this as a subset of HLE for now.
+
+#include "common/common_types.h"
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+namespace ConfigMem {
+
+template <typename T>
+inline void Read(T &var, const u32 addr);
+
+} // namespace
diff --git a/src/core/hle/coprocessor.cpp b/src/core/hle/coprocessor.cpp
new file mode 100644
index 000000000..39674ee64
--- /dev/null
+++ b/src/core/hle/coprocessor.cpp
@@ -0,0 +1,34 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2
+// Refer to the license.txt file included.  
+
+#include "core/hle/coprocessor.h"
+#include "core/hle/hle.h"
+#include "core/mem_map.h"
+#include "core/core.h"
+
+namespace HLE {
+
+/// Returns the coprocessor (in this case, syscore) command buffer pointer
+Addr GetThreadCommandBuffer() {
+    // Called on insruction: mrc p15, 0, r0, c13, c0, 3
+    return Memory::KERNEL_MEMORY_VADDR;
+}
+
+/// Call an MRC (move to ARM register from coprocessor) instruction in HLE
+s32 CallMRC(u32 instruction) {
+    CoprocessorOperation operation = (CoprocessorOperation)((instruction >> 20) & 0xFF);
+
+    switch (operation) {
+
+    case CALL_GET_THREAD_COMMAND_BUFFER:
+        return GetThreadCommandBuffer();
+
+    default:
+        //DEBUG_LOG(OSHLE, "unknown MRC call 0x%08X", instruction);
+        break;
+    }
+    return -1;
+}
+
+} // namespace
diff --git a/src/core/hle/mrc.h b/src/core/hle/coprocessor.h
index d6b9f162f..b08d6f3ee 100644
--- a/src/core/hle/mrc.h
+++ b/src/core/hle/coprocessor.h
@@ -8,13 +8,13 @@
 
 namespace HLE {
 
-/// MRC operations (ARM register from coprocessor), decoded as instr[20:27]
-enum ARM11_MRC_OPERATION {
+/// Coprocessor operations
+enum CoprocessorOperation {
     DATA_SYNCHRONIZATION_BARRIER    = 0xE0,
     CALL_GET_THREAD_COMMAND_BUFFER  = 0xE1,
 };
 
-/// Call an MRC operation in HLE
-u32 CallMRC(ARM11_MRC_OPERATION operation);
+/// Call an MRC (move to ARM register from coprocessor) instruction in HLE
+s32 CallMRC(u32 instruction);
 
 } // namespace
diff --git a/src/core/hle/function_wrappers.h b/src/core/hle/function_wrappers.h
index cab772004..d934eafb4 100644
--- a/src/core/hle/function_wrappers.h
+++ b/src/core/hle/function_wrappers.h
@@ -143,8 +143,8 @@ template<int func(u32, u32, u32, u32, u32)> void WrapI_UUUUU() {
     RETURN(retval);
 }
 
-template<int func()> void WrapI_V() {
-    int retval = func();
+template<int func(void*)> void WrapI_V() {
+    u32 retval = func(Memory::GetPointer(PARAM(0)));
     RETURN(retval);
 }
 
@@ -623,6 +623,10 @@ template<u32 func(const char *, const char *)> void WrapU_CC() {
     RETURN(retval);
 }
 
+template<void func(const char*)> void WrapV_C() {
+    func(Memory::GetCharPointer(PARAM(0)));
+}
+
 template<void func(const char *, int)> void WrapV_CI() {
     func(Memory::GetCharPointer(PARAM(0)), PARAM(1));
 }
@@ -701,18 +705,28 @@ template <int func(int, const char *, int)> void WrapI_ICI() {
 }
 
 template<int func(int, void *, void *, void *, void *, u32, int)> void WrapI_IVVVVUI(){
-  u32 retval = func(PARAM(0), Memory::GetPointer(PARAM(1)), Memory::GetPointer(PARAM(2)), Memory::GetPointer(PARAM(3)), Memory::GetPointer(PARAM(4)), PARAM(5), PARAM(6) );
-  RETURN(retval);
+    u32 retval = func(PARAM(0), Memory::GetPointer(PARAM(1)), Memory::GetPointer(PARAM(2)), Memory::GetPointer(PARAM(3)), Memory::GetPointer(PARAM(4)), PARAM(5), PARAM(6) );
+    RETURN(retval);
 }
 
 template<int func(int, const char *, u32, void *, int, int, int)> void WrapI_ICUVIII(){
-  u32 retval = func(PARAM(0), Memory::GetCharPointer(PARAM(1)), PARAM(2), Memory::GetPointer(PARAM(3)), PARAM(4), PARAM(5), PARAM(6));
-  RETURN(retval);
+    u32 retval = func(PARAM(0), Memory::GetCharPointer(PARAM(1)), PARAM(2), Memory::GetPointer(PARAM(3)), PARAM(4), PARAM(5), PARAM(6));
+    RETURN(retval);
 }
 
-template<int func(void *, u32, u32, u32, u32, u32)> void WrapI_VUUUUU(){
-  u32 retval = func(Memory::GetPointer(PARAM(0)), PARAM(1), PARAM(2), PARAM(3), PARAM(4), PARAM(5));
-  RETURN(retval);
+template<int func(void*, u32)> void WrapI_VU(){
+    u32 retval = func(Memory::GetPointer(PARAM(0)), PARAM(1));
+    RETURN(retval);
+}
+
+template<int func(void*, u32, void*, int)> void WrapI_VUVI(){
+    u32 retval = func(Memory::GetPointer(PARAM(0)), PARAM(1), Memory::GetPointer(PARAM(2)), PARAM(3));
+    RETURN(retval);
+}
+
+template<int func(void*, u32, u32, u32, u32, u32)> void WrapI_VUUUUU(){
+    u32 retval = func(Memory::GetPointer(PARAM(0)), PARAM(1), PARAM(2), PARAM(3), PARAM(4), PARAM(5));
+    RETURN(retval);
 }
 
 template<int func(u32, s64)> void WrapI_US64() {
diff --git a/src/core/hle/hle.cpp b/src/core/hle/hle.cpp
index aae9a3943..be151665b 100644
--- a/src/core/hle/hle.cpp
+++ b/src/core/hle/hle.cpp
@@ -15,49 +15,6 @@ namespace HLE {
 
 static std::vector<ModuleDef> g_module_db;
 
-u8* g_command_buffer = NULL;    ///< Command buffer used for sharing between appcore and syscore
-
-// Read from memory used by CTROS HLE functions
-template <typename T>
-inline void Read(T &var, const u32 addr) {
-    if (addr >= HLE::CMD_BUFFER_ADDR && addr < HLE::CMD_BUFFER_ADDR_END) {
-        var = *((const T*)&g_command_buffer[addr & CMD_BUFFER_MASK]);
-    } else {
-        ERROR_LOG(HLE, "unknown read from address %08X", addr);
-    }
-}
-
-// Write to memory used by CTROS HLE functions
-template <typename T>
-inline void Write(u32 addr, const T data) {
-    if (addr >= HLE::CMD_BUFFER_ADDR && addr < HLE::CMD_BUFFER_ADDR_END) {
-        *(T*)&g_command_buffer[addr & CMD_BUFFER_MASK] = data;
-    } else {
-        ERROR_LOG(HLE, "unknown write to address %08X", addr);
-    }
-}
-
-u8 *GetPointer(const u32 addr) {
-    if (addr >= HLE::CMD_BUFFER_ADDR && addr < HLE::CMD_BUFFER_ADDR_END) {
-        return g_command_buffer + (addr & CMD_BUFFER_MASK);
-    } else {
-        ERROR_LOG(HLE, "unknown pointer from address %08X", addr);
-        return 0;
-    }
-}
-
-// Explicitly instantiate template functions because we aren't defining this in the header:
-
-template void Read<u64>(u64 &var, const u32 addr);
-template void Read<u32>(u32 &var, const u32 addr);
-template void Read<u16>(u16 &var, const u32 addr);
-template void Read<u8>(u8 &var, const u32 addr);
-
-template void Write<u64>(u32 addr, const u64 data);
-template void Write<u32>(u32 addr, const u32 data);
-template void Write<u16>(u32 addr, const u16 data);
-template void Write<u8>(u32 addr, const u8 data);
-
 const FunctionDef* GetSyscallInfo(u32 opcode) {
     u32 func_num = opcode & 0xFFFFFF; // 8 bits
     if (func_num > 0xFF) {
@@ -91,8 +48,6 @@ void RegisterAllModules() {
 
 void Init() {
     Service::Init();
-
-    g_command_buffer = new u8[CMD_BUFFER_SIZE];
     
     RegisterAllModules();
 
@@ -102,8 +57,6 @@ void Init() {
 void Shutdown() {
     Service::Shutdown();
 
-    delete g_command_buffer;
-
     g_module_db.clear();
 
     NOTICE_LOG(HLE, "shutdown OK");
diff --git a/src/core/hle/hle.h b/src/core/hle/hle.h
index 907e2d741..42f37e29c 100644
--- a/src/core/hle/hle.h
+++ b/src/core/hle/hle.h
@@ -17,13 +17,6 @@
 
 namespace HLE {
 
-enum {
-    CMD_BUFFER_ADDR     = 0xA0010000,    ///< Totally arbitrary unused address space
-    CMD_BUFFER_SIZE     = 0x10000,
-    CMD_BUFFER_MASK     = (CMD_BUFFER_SIZE - 1),
-    CMD_BUFFER_ADDR_END = (CMD_BUFFER_ADDR + CMD_BUFFER_SIZE),
-};
-
 typedef u32 Addr;
 typedef void (*Func)();
 
@@ -39,20 +32,6 @@ struct ModuleDef {
     const FunctionDef*  func_table;
 };
 
-// Read from memory used by CTROS HLE functions
-template <typename T>
-inline void Read(T &var, const u32 addr);
-
-// Write to memory used by CTROS HLE functions
-template <typename T>
-inline void Write(u32 addr, const T data);
-
-u8* GetPointer(const u32 Address);
-
-inline const char* GetCharPointer(const u32 address) {
-    return (const char *)GetPointer(address);
-}
-
 void RegisterModule(std::string name, int num_functions, const FunctionDef *func_table);
 
 void CallSyscall(u32 opcode);
diff --git a/src/core/hle/mrc.cpp b/src/core/hle/mrc.cpp
deleted file mode 100644
index 5223be7c9..000000000
--- a/src/core/hle/mrc.cpp
+++ /dev/null
@@ -1,64 +0,0 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2
-// Refer to the license.txt file included.  
-
-#include "core/hle/mrc.h"
-#include "core/hle/hle.h"
-#include "core/mem_map.h"
-#include "core/core.h"
-
-namespace HLE {
-
-enum {
-    CMD_GX_REQUEST_DMA  = 0x00000000,
-};
-
-/// Data synchronization barrier
-u32 DataSynchronizationBarrier(u32* command_buffer) {
-    u32 command = command_buffer[0];
-
-    switch (command) {
-
-    case CMD_GX_REQUEST_DMA:
-        {
-            u32* src = (u32*)Memory::GetPointer(command_buffer[1]);
-            u32* dst = (u32*)Memory::GetPointer(command_buffer[2]);
-            u32 size = command_buffer[3];
-            memcpy(dst, src, size);
-        }
-        break;
-
-    default:
-        ERROR_LOG(OSHLE, "MRC::DataSynchronizationBarrier unknown command 0x%08X", command);
-        return -1;
-    }
-
-    return 0;
-}
-
-/// Returns the coprocessor (in this case, syscore) command buffer pointer
-Addr GetThreadCommandBuffer() {
-    // Called on insruction: mrc p15, 0, r0, c13, c0, 3
-    // Returns an address in OSHLE memory for the CPU to read/write to
-    RETURN(CMD_BUFFER_ADDR);
-    return CMD_BUFFER_ADDR;
-}
-
-/// Call an MRC operation in HLE
-u32 CallMRC(ARM11_MRC_OPERATION operation) {
-    switch (operation) {
-
-    case DATA_SYNCHRONIZATION_BARRIER:
-        return DataSynchronizationBarrier((u32*)Memory::GetPointer(PARAM(0)));
-
-    case CALL_GET_THREAD_COMMAND_BUFFER:
-        return GetThreadCommandBuffer();
-
-    default:
-        ERROR_LOG(OSHLE, "unimplemented MRC operation 0x%02X", operation);
-        break;
-    }
-    return -1;
-}
-
-} // namespace
diff --git a/src/core/hle/service/apt.cpp b/src/core/hle/service/apt.cpp
index 4a1e8c992..709ac5493 100644
--- a/src/core/hle/service/apt.cpp
+++ b/src/core/hle/service/apt.cpp
@@ -18,7 +18,7 @@ void Initialize(Service::Interface* self) {
 }
 
 void GetLockHandle(Service::Interface* self) {
-    u32* cmd_buff = (u32*)HLE::GetPointer(HLE::CMD_BUFFER_ADDR + Service::kCommandHeaderOffset);
+    u32* cmd_buff = Service::GetCommandBuffer();
     cmd_buff[5] = 0x00000000; // TODO: This should be an actual mutex handle
 }
 
diff --git a/src/core/hle/service/gsp.cpp b/src/core/hle/service/gsp.cpp
index 88c1f1a0f..12c7dabcd 100644
--- a/src/core/hle/service/gsp.cpp
+++ b/src/core/hle/service/gsp.cpp
@@ -4,6 +4,7 @@
 
 
 #include "common/log.h"
+#include "common/bit_field.h"
 
 #include "core/mem_map.h"
 #include "core/hle/hle.h"
@@ -12,10 +13,51 @@
 #include "core/hw/lcd.h"
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/// GSP shared memory GX command buffer header
+union GX_CmdBufferHeader {
+    u32 hex;
+
+    // Current command index. This index is updated by GSP module after loading the command data, 
+    // right before the command is processed. When this index is updated by GSP module, the total 
+    // commands field is decreased by one as well.
+    BitField<0,8,u32>   index;
+    
+    // Total commands to process, must not be value 0 when GSP module handles commands. This must be
+    // <=15 when writing a command to shared memory. This is incremented by the application when 
+    // writing a command to shared memory, after increasing this value TriggerCmdReqQueue is only 
+    // used if this field is value 1.
+    BitField<8,8,u32>  number_commands;
+
+};
+
+/// Gets the address of the start (header) of a command buffer in GSP shared memory
+static inline u32 GX_GetCmdBufferAddress(u32 thread_id) {
+    return (0x10002000 + 0x800 + (thread_id * 0x200));
+}
+
+/// Gets a pointer to the start (header) of a command buffer in GSP shared memory
+static inline u8* GX_GetCmdBufferPointer(u32 thread_id, u32 offset=0) {
+    return Memory::GetPointer(GX_GetCmdBufferAddress(thread_id) + offset);
+}
+
+/// Finishes execution of a GSP command
+void GX_FinishCommand(u32 thread_id) {
+    GX_CmdBufferHeader* header = (GX_CmdBufferHeader*)GX_GetCmdBufferPointer(thread_id);
+    header->number_commands = header->number_commands - 1;
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
 // Namespace GSP_GPU
 
 namespace GSP_GPU {
 
+u32 g_thread_id = 0;
+
+enum {
+    CMD_GX_REQUEST_DMA  = 0x00000000,
+};
+
 enum {
     REG_FRAMEBUFFER_1   = 0x00400468,
     REG_FRAMEBUFFER_2   = 0x00400494,
@@ -26,7 +68,7 @@ void ReadHWRegs(Service::Interface* self) {
     static const u32 framebuffer_1[] = {LCD::PADDR_VRAM_TOP_LEFT_FRAME1, LCD::PADDR_VRAM_TOP_RIGHT_FRAME1};
     static const u32 framebuffer_2[] = {LCD::PADDR_VRAM_TOP_LEFT_FRAME2, LCD::PADDR_VRAM_TOP_RIGHT_FRAME2};
 
-    u32* cmd_buff = (u32*)HLE::GetPointer(HLE::CMD_BUFFER_ADDR + Service::kCommandHeaderOffset);
+    u32* cmd_buff = Service::GetCommandBuffer();
     u32 reg_addr = cmd_buff[1];
     u32 size = cmd_buff[2];
     u32* dst = (u32*)Memory::GetPointer(cmd_buff[0x41]);
@@ -50,18 +92,37 @@ void ReadHWRegs(Service::Interface* self) {
         break;
 
     default:
-        ERROR_LOG(OSHLE, "GSP_GPU::ReadHWRegs unknown register read at address %08X", reg_addr);
+        ERROR_LOG(GSP, "ReadHWRegs unknown register read at address %08X", reg_addr);
     }
 
 }
 
 void RegisterInterruptRelayQueue(Service::Interface* self) {
-    u32* cmd_buff = (u32*)HLE::GetPointer(HLE::CMD_BUFFER_ADDR + Service::kCommandHeaderOffset);
+    u32* cmd_buff = Service::GetCommandBuffer();
     u32 flags = cmd_buff[1];
     u32 event_handle = cmd_buff[3]; // TODO(bunnei): Implement event handling
+    
+    cmd_buff[2] = g_thread_id;          // ThreadID
     cmd_buff[4] = self->NewHandle();
+}
+
+/// This triggers handling of the GX command written to the command buffer in shared memory.
+void TriggerCmdReqQueue(Service::Interface* self) {
+    GX_CmdBufferHeader* header = (GX_CmdBufferHeader*)GX_GetCmdBufferPointer(g_thread_id);
+    u32* cmd_buff = (u32*)GX_GetCmdBufferPointer(g_thread_id, 0x20 + (header->index * 0x20));
 
-    return;
+    switch (cmd_buff[0]) {
+
+    // GX request DMA - typically used for copying memory from GSP heap to VRAM
+    case CMD_GX_REQUEST_DMA:
+        memcpy(Memory::GetPointer(cmd_buff[2]), Memory::GetPointer(cmd_buff[1]), cmd_buff[3]);
+        break;
+
+    default:
+        ERROR_LOG(GSP, "TriggerCmdReqQueue unknown command 0x%08X", cmd_buff[0]);
+    }
+    
+    GX_FinishCommand(g_thread_id);
 }
 
 const Interface::FunctionInfo FunctionTable[] = {
@@ -76,7 +137,7 @@ const Interface::FunctionInfo FunctionTable[] = {
     {0x00090082, NULL,                          "InvalidateDataCache"},
     {0x000A0044, NULL,                          "RegisterInterruptEvents"},
     {0x000B0040, NULL,                          "SetLcdForceBlack"},
-    {0x000C0000, NULL,                          "TriggerCmdReqQueue"},
+    {0x000C0000, TriggerCmdReqQueue,            "TriggerCmdReqQueue"},
     {0x000D0140, NULL,                          "SetDisplayTransfer"},
     {0x000E0180, NULL,                          "SetTextureCopy"},
     {0x000F0200, NULL,                          "SetMemoryFill"},
diff --git a/src/core/hle/service/service.h b/src/core/hle/service/service.h
index b79dc9458..b260a290a 100644
--- a/src/core/hle/service/service.h
+++ b/src/core/hle/service/service.h
@@ -10,6 +10,7 @@
 
 #include "common/common.h"
 #include "common/common_types.h"
+#include "core/mem_map.h"
 #include "core/hle/syscall.h"
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -22,6 +23,15 @@ typedef s32 NativeUID;                          ///< Native handle for a service
 static const int kMaxPortSize           = 0x08; ///< Maximum size of a port name (8 characters)
 static const int kCommandHeaderOffset   = 0x80; ///< Offset into command buffer of header
 
+/**
+ * Returns a pointer to the command buffer in kernel memory
+ * @param offset Optional offset into command buffer
+ * @return Pointer to command buffer
+ */
+inline static u32* GetCommandBuffer(const int offset=0) {
+    return (u32*)Memory::GetPointer(Memory::KERNEL_MEMORY_VADDR + kCommandHeaderOffset + offset);
+}
+
 class Manager;
 
 /// Interface to a CTROS service
@@ -81,7 +91,7 @@ public:
      * @return Return result of svcSendSyncRequest passed back to user app
      */
     Syscall::Result Sync() {
-        u32* cmd_buff = (u32*)HLE::GetPointer(HLE::CMD_BUFFER_ADDR + kCommandHeaderOffset);
+        u32* cmd_buff = GetCommandBuffer();
         auto itr = m_functions.find(cmd_buff[0]);
 
         if (itr == m_functions.end()) {
diff --git a/src/core/hle/service/srv.cpp b/src/core/hle/service/srv.cpp
index 9437868c5..071741444 100644
--- a/src/core/hle/service/srv.cpp
+++ b/src/core/hle/service/srv.cpp
@@ -18,7 +18,7 @@ void Initialize(Service::Interface* self) {
 
 void GetServiceHandle(Service::Interface* self) {
     Syscall::Result res = 0;
-    u32* cmd_buff = (u32*)HLE::GetPointer(HLE::CMD_BUFFER_ADDR + Service::kCommandHeaderOffset);
+    u32* cmd_buff = Service::GetCommandBuffer();
 
     std::string port_name = std::string((const char*)&cmd_buff[1], 0, Service::kMaxPortSize);
     Service::Interface* service = Service::g_manager->FetchFromPortName(port_name);
diff --git a/src/core/hle/syscall.cpp b/src/core/hle/syscall.cpp
index df6412743..d47df6038 100644
--- a/src/core/hle/syscall.cpp
+++ b/src/core/hle/syscall.cpp
@@ -29,6 +29,9 @@ enum MapMemoryPermission {
 Result ControlMemory(u32 operation, u32 addr0, u32 addr1, u32 size, u32 permissions) {
     u32 virtual_address = 0x00000000;
 
+    DEBUG_LOG(SVC, "ControlMemory called operation=0x%08X, addr0=0x%08X, addr1=0x%08X, size=%08X, permissions=0x%08X", 
+        operation, addr0, addr1, size, permissions);
+
     switch (operation) {
 
     // Map normal heap memory
@@ -43,16 +46,18 @@ Result ControlMemory(u32 operation, u32 addr0, u32 addr1, u32 size, u32 permissi
 
     // Unknown ControlMemory operation
     default:
-        ERROR_LOG(OSHLE, "Unknown ControlMemory operation %08X", operation);
+        ERROR_LOG(SVC, "ControlMemory unknown operation=0x%08X", operation);
     }
 
     Core::g_app_core->SetReg(1, virtual_address);
+
     return 0;
 }
 
 /// Maps a memory block to specified address
 Result MapMemoryBlock(Handle memblock, u32 addr, u32 mypermissions, u32 otherpermission) {
-    int x = 0;
+    DEBUG_LOG(SVC, "MapMemoryBlock called memblock=0x08X, addr=0x%08X, mypermissions=0x%08X, otherpermission=%d", 
+        memblock, addr, mypermissions, otherpermission);
     switch (mypermissions) {
     case MEMORY_PERMISSION_NORMAL:
     case MEMORY_PERMISSION_NORMAL + 1:
@@ -60,20 +65,23 @@ Result MapMemoryBlock(Handle memblock, u32 addr, u32 mypermissions, u32 otherper
         Memory::MapBlock_Shared(memblock, addr, mypermissions);
         break;
     default:
-        ERROR_LOG(OSHLE, "Unknown MapMemoryBlock permissions %08X", mypermissions);
+        ERROR_LOG(OSHLE, "MapMemoryBlock unknown permissions=0x%08X", mypermissions);
     }
     return 0;
 }
 
 /// Connect to an OS service given the port name, returns the handle to the port to out
 Result ConnectToPort(void* out, const char* port_name) {
+    
     Service::Interface* service = Service::g_manager->FetchFromPortName(port_name);
     Core::g_app_core->SetReg(1, service->GetUID());
+    DEBUG_LOG(SVC, "ConnectToPort called port_name=%s", port_name);
     return 0;
 }
 
 /// Synchronize to an OS service
 Result SendSyncRequest(Handle session) {
+    DEBUG_LOG(SVC, "SendSyncRequest called session=0x%08X");
     Service::Interface* service = Service::g_manager->FetchFromUID(session);
     service->Sync();
     return 0;
@@ -82,142 +90,177 @@ Result SendSyncRequest(Handle session) {
 /// Close a handle
 Result CloseHandle(Handle handle) {
     // ImplementMe
+    DEBUG_LOG(SVC, "(UNIMPLEMENTED) CloseHandle called handle=0x%08X", handle);
     return 0;
 }
 
 /// Wait for a handle to synchronize, timeout after the specified nanoseconds
 Result WaitSynchronization1(Handle handle, s64 nanoseconds) {
     // ImplementMe
+    DEBUG_LOG(SVC, "(UNIMPLEMENTED) WaitSynchronization1 called handle=0x%08X, nanoseconds=%d", 
+        handle, nanoseconds);
+    return 0;
+}
+
+/// Create an address arbiter (to allocate access to shared resources)
+Result CreateAddressArbiter(void* arbiter) {
+    // ImplementMe
+    DEBUG_LOG(SVC, "(UNIMPLEMENTED) CreateAddressArbiter called");
+    Core::g_app_core->SetReg(1, 0xDEADBEEF);
+    return 0;
+}
+
+/// Used to output a message on a debug hardware unit - does nothing on a retail unit
+void OutputDebugString(const char* string) {
+    NOTICE_LOG(SVC, "## OSDEBUG: %08X %s", Core::g_app_core->GetPC(), string);
+}
+
+/// Get resource limit
+Result GetResourceLimit(void* resource_limit, Handle process) {
+    // With regards to proceess values:
+    // 0xFFFF8001 is a handle alias for the current KProcess, and 0xFFFF8000 is a handle alias for 
+    // the current KThread.
+    DEBUG_LOG(SVC, "(UNIMPLEMENTED) GetResourceLimit called process=0x%08X", process);
+    Core::g_app_core->SetReg(1, 0xDEADBEEF);
+    return 0;
+}
+
+/// Get resource limit current values
+Result GetResourceLimitCurrentValues(void* _values, Handle resource_limit, void* names, s32 name_count) {
+    //s64* values = (s64*)_values;
+    DEBUG_LOG(SVC, "(UNIMPLEMENTED) GetResourceLimitCurrentValues called resource_limit=%08X, names=%s, name_count=%d",
+        resource_limit, names, name_count);
+    Memory::Write32(Core::g_app_core->GetReg(0), 0); // Normmatt: Set used memory to 0 for now
     return 0;
 }
 
 const HLE::FunctionDef Syscall_Table[] = {
-    {0x00,  NULL,                               "Unknown"},
-    {0x01,  WrapI_UUUUU<ControlMemory>,         "ControlMemory"},
-    {0x02,  NULL,                               "QueryMemory"},
-    {0x03,  NULL,                               "ExitProcess"},
-    {0x04,  NULL,                               "GetProcessAffinityMask"},
-    {0x05,  NULL,                               "SetProcessAffinityMask"},
-    {0x06,  NULL,                               "GetProcessIdealProcessor"},
-    {0x07,  NULL,                               "SetProcessIdealProcessor"},
-    {0x08,  NULL,                               "CreateThread"},
-    {0x09,  NULL,                               "ExitThread"},
-    {0x0A,  NULL,                               "SleepThread"},
-    {0x0B,  NULL,                               "GetThreadPriority"},
-    {0x0C,  NULL,                               "SetThreadPriority"},
-    {0x0D,  NULL,                               "GetThreadAffinityMask"},
-    {0x0E,  NULL,                               "SetThreadAffinityMask"},
-    {0x0F,  NULL,                               "GetThreadIdealProcessor"},
-    {0x10,  NULL,                               "SetThreadIdealProcessor"},
-    {0x11,  NULL,                               "GetCurrentProcessorNumber"},
-    {0x12,  NULL,                               "Run"},
-    {0x13,  NULL,                               "CreateMutex"},
-    {0x14,  NULL,                               "ReleaseMutex"},
-    {0x15,  NULL,                               "CreateSemaphore"},
-    {0x16,  NULL,                               "ReleaseSemaphore"},
-    {0x17,  NULL,                               "CreateEvent"},
-    {0x18,  NULL,                               "SignalEvent"},
-    {0x19,  NULL,                               "ClearEvent"},
-    {0x1A,  NULL,                               "CreateTimer"},
-    {0x1B,  NULL,                               "SetTimer"},
-    {0x1C,  NULL,                               "CancelTimer"},
-    {0x1D,  NULL,                               "ClearTimer"},
-    {0x1E,  NULL,                               "CreateMemoryBlock"},
-    {0x1F,  WrapI_UUUU<MapMemoryBlock>,         "MapMemoryBlock"},
-    {0x20,  NULL,                               "UnmapMemoryBlock"},
-    {0x21,  NULL,                               "CreateAddressArbiter"},
-    {0x22,  NULL,                               "ArbitrateAddress"},
-    {0x23,  WrapI_U<CloseHandle>,               "CloseHandle"},
-    {0x24,  WrapI_US64<WaitSynchronization1>,   "WaitSynchronization1"},
-    {0x25,  NULL,                               "WaitSynchronizationN"},
-    {0x26,  NULL,                               "SignalAndWait"},
-    {0x27,  NULL,                               "DuplicateHandle"},
-    {0x28,  NULL,                               "GetSystemTick"},
-    {0x29,  NULL,                               "GetHandleInfo"},
-    {0x2A,  NULL,                               "GetSystemInfo"},
-    {0x2B,  NULL,                               "GetProcessInfo"},
-    {0x2C,  NULL,                               "GetThreadInfo"},
-    {0x2D,  WrapI_VC<ConnectToPort>,            "ConnectToPort"},
-    {0x2E,  NULL,                               "SendSyncRequest1"},
-    {0x2F,  NULL,                               "SendSyncRequest2"},
-    {0x30,  NULL,                               "SendSyncRequest3"},
-    {0x31,  NULL,                               "SendSyncRequest4"},
-    {0x32,  WrapI_U<SendSyncRequest>,           "SendSyncRequest"},
-    {0x33,  NULL,                               "OpenProcess"},
-    {0x34,  NULL,                               "OpenThread"},
-    {0x35,  NULL,                               "GetProcessId"},
-    {0x36,  NULL,                               "GetProcessIdOfThread"},
-    {0x37,  NULL,                               "GetThreadId"},
-    {0x38,  NULL,                               "GetResourceLimit"},
-    {0x39,  NULL,                               "GetResourceLimitLimitValues"},
-    {0x3A,  NULL,                               "GetResourceLimitCurrentValues"},
-    {0x3B,  NULL,                               "GetThreadContext"},
-    {0x3C,  NULL,                               "Break"},
-    {0x3D,  NULL,                               "OutputDebugString"},
-    {0x3E,  NULL,                               "ControlPerformanceCounter"},
-    {0x3F,  NULL,                               "Unknown"},
-    {0x40,  NULL,                               "Unknown"},
-    {0x41,  NULL,                               "Unknown"},
-    {0x42,  NULL,                               "Unknown"},
-    {0x43,  NULL,                               "Unknown"},
-    {0x44,  NULL,                               "Unknown"},
-    {0x45,  NULL,                               "Unknown"},
-    {0x46,  NULL,                               "Unknown"},
-    {0x47,  NULL,                               "CreatePort"},
-    {0x48,  NULL,                               "CreateSessionToPort"},
-    {0x49,  NULL,                               "CreateSession"},
-    {0x4A,  NULL,                               "AcceptSession"},
-    {0x4B,  NULL,                               "ReplyAndReceive1"},
-    {0x4C,  NULL,                               "ReplyAndReceive2"},
-    {0x4D,  NULL,                               "ReplyAndReceive3"},
-    {0x4E,  NULL,                               "ReplyAndReceive4"},
-    {0x4F,  NULL,                               "ReplyAndReceive"},
-    {0x50,  NULL,                               "BindInterrupt"},
-    {0x51,  NULL,                               "UnbindInterrupt"},
-    {0x52,  NULL,                               "InvalidateProcessDataCache"},
-    {0x53,  NULL,                               "StoreProcessDataCache"},
-    {0x54,  NULL,                               "FlushProcessDataCache"},
-    {0x55,  NULL,                               "StartInterProcessDma"},
-    {0x56,  NULL,                               "StopDma"},
-    {0x57,  NULL,                               "GetDmaState"},
-    {0x58,  NULL,                               "RestartDma"},
-    {0x59,  NULL,                               "Unknown"},
-    {0x5A,  NULL,                               "Unknown"},
-    {0x5B,  NULL,                               "Unknown"},
-    {0x5C,  NULL,                               "Unknown"},
-    {0x5D,  NULL,                               "Unknown"},
-    {0x5E,  NULL,                               "Unknown"},
-    {0x5F,  NULL,                               "Unknown"},
-    {0x60,  NULL,                               "DebugActiveProcess"},
-    {0x61,  NULL,                               "BreakDebugProcess"},
-    {0x62,  NULL,                               "TerminateDebugProcess"},
-    {0x63,  NULL,                               "GetProcessDebugEvent"},
-    {0x64,  NULL,                               "ContinueDebugEvent"},
-    {0x65,  NULL,                               "GetProcessList"},
-    {0x66,  NULL,                               "GetThreadList"},
-    {0x67,  NULL,                               "GetDebugThreadContext"},
-    {0x68,  NULL,                               "SetDebugThreadContext"},
-    {0x69,  NULL,                               "QueryDebugProcessMemory"},
-    {0x6A,  NULL,                               "ReadProcessMemory"},
-    {0x6B,  NULL,                               "WriteProcessMemory"},
-    {0x6C,  NULL,                               "SetHardwareBreakPoint"},
-    {0x6D,  NULL,                               "GetDebugThreadParam"},
-    {0x6E,  NULL,                               "Unknown"},
-    {0x6F,  NULL,                               "Unknown"},
-    {0x70,  NULL,                               "ControlProcessMemory"},
-    {0x71,  NULL,                               "MapProcessMemory"},
-    {0x72,  NULL,                               "UnmapProcessMemory"},
-    {0x73,  NULL,                               "Unknown"},
-    {0x74,  NULL,                               "Unknown"},
-    {0x75,  NULL,                               "Unknown"},
-    {0x76,  NULL,                               "TerminateProcess"},
-    {0x77,  NULL,                               "Unknown"},
-    {0x78,  NULL,                               "CreateResourceLimit"},
-    {0x79,  NULL,                               "Unknown"},
-    {0x7A,  NULL,                               "Unknown"},
-    {0x7B,  NULL,                               "Unknown"},
-    {0x7C,  NULL,                               "KernelSetState"},
-    {0x7D,  NULL,                               "QueryProcessMemory"},
+    {0x00,  NULL,                                       "Unknown"},
+    {0x01,  WrapI_UUUUU<ControlMemory>,                 "ControlMemory"},
+    {0x02,  NULL,                                       "QueryMemory"},
+    {0x03,  NULL,                                       "ExitProcess"},
+    {0x04,  NULL,                                       "GetProcessAffinityMask"},
+    {0x05,  NULL,                                       "SetProcessAffinityMask"},
+    {0x06,  NULL,                                       "GetProcessIdealProcessor"},
+    {0x07,  NULL,                                       "SetProcessIdealProcessor"},
+    {0x08,  NULL,                                       "CreateThread"},
+    {0x09,  NULL,                                       "ExitThread"},
+    {0x0A,  NULL,                                       "SleepThread"},
+    {0x0B,  NULL,                                       "GetThreadPriority"},
+    {0x0C,  NULL,                                       "SetThreadPriority"},
+    {0x0D,  NULL,                                       "GetThreadAffinityMask"},
+    {0x0E,  NULL,                                       "SetThreadAffinityMask"},
+    {0x0F,  NULL,                                       "GetThreadIdealProcessor"},
+    {0x10,  NULL,                                       "SetThreadIdealProcessor"},
+    {0x11,  NULL,                                       "GetCurrentProcessorNumber"},
+    {0x12,  NULL,                                       "Run"},
+    {0x13,  NULL,                                       "CreateMutex"},
+    {0x14,  NULL,                                       "ReleaseMutex"},
+    {0x15,  NULL,                                       "CreateSemaphore"},
+    {0x16,  NULL,                                       "ReleaseSemaphore"},
+    {0x17,  NULL,                                       "CreateEvent"},
+    {0x18,  NULL,                                       "SignalEvent"},
+    {0x19,  NULL,                                       "ClearEvent"},
+    {0x1A,  NULL,                                       "CreateTimer"},
+    {0x1B,  NULL,                                       "SetTimer"},
+    {0x1C,  NULL,                                       "CancelTimer"},
+    {0x1D,  NULL,                                       "ClearTimer"},
+    {0x1E,  NULL,                                       "CreateMemoryBlock"},
+    {0x1F,  WrapI_UUUU<MapMemoryBlock>,                 "MapMemoryBlock"},
+    {0x20,  NULL,                                       "UnmapMemoryBlock"},
+    {0x21,  WrapI_V<CreateAddressArbiter>,              "CreateAddressArbiter"},
+    {0x22,  NULL,                                       "ArbitrateAddress"},
+    {0x23,  WrapI_U<CloseHandle>,                       "CloseHandle"},
+    {0x24,  WrapI_US64<WaitSynchronization1>,           "WaitSynchronization1"},
+    {0x25,  NULL,                                       "WaitSynchronizationN"},
+    {0x26,  NULL,                                       "SignalAndWait"},
+    {0x27,  NULL,                                       "DuplicateHandle"},
+    {0x28,  NULL,                                       "GetSystemTick"},
+    {0x29,  NULL,                                       "GetHandleInfo"},
+    {0x2A,  NULL,                                       "GetSystemInfo"},
+    {0x2B,  NULL,                                       "GetProcessInfo"},
+    {0x2C,  NULL,                                       "GetThreadInfo"},
+    {0x2D,  WrapI_VC<ConnectToPort>,                    "ConnectToPort"},
+    {0x2E,  NULL,                                       "SendSyncRequest1"},
+    {0x2F,  NULL,                                       "SendSyncRequest2"},
+    {0x30,  NULL,                                       "SendSyncRequest3"},
+    {0x31,  NULL,                                       "SendSyncRequest4"},
+    {0x32,  WrapI_U<SendSyncRequest>,                   "SendSyncRequest"},
+    {0x33,  NULL,                                       "OpenProcess"},
+    {0x34,  NULL,                                       "OpenThread"},
+    {0x35,  NULL,                                       "GetProcessId"},
+    {0x36,  NULL,                                       "GetProcessIdOfThread"},
+    {0x37,  NULL,                                       "GetThreadId"},
+    {0x38,  WrapI_VU<GetResourceLimit>,                 "GetResourceLimit"},
+    {0x39,  NULL,                                       "GetResourceLimitLimitValues"},
+    {0x3A,  WrapI_VUVI<GetResourceLimitCurrentValues>,  "GetResourceLimitCurrentValues"},
+    {0x3B,  NULL,                                       "GetThreadContext"},
+    {0x3C,  NULL,                                       "Break"},
+    {0x3D,  WrapV_C<OutputDebugString>,                 "OutputDebugString"},
+    {0x3E,  NULL,                                       "ControlPerformanceCounter"},
+    {0x3F,  NULL,                                       "Unknown"},
+    {0x40,  NULL,                                       "Unknown"},
+    {0x41,  NULL,                                       "Unknown"},
+    {0x42,  NULL,                                       "Unknown"},
+    {0x43,  NULL,                                       "Unknown"},
+    {0x44,  NULL,                                       "Unknown"},
+    {0x45,  NULL,                                       "Unknown"},
+    {0x46,  NULL,                                       "Unknown"},
+    {0x47,  NULL,                                       "CreatePort"},
+    {0x48,  NULL,                                       "CreateSessionToPort"},
+    {0x49,  NULL,                                       "CreateSession"},
+    {0x4A,  NULL,                                       "AcceptSession"},
+    {0x4B,  NULL,                                       "ReplyAndReceive1"},
+    {0x4C,  NULL,                                       "ReplyAndReceive2"},
+    {0x4D,  NULL,                                       "ReplyAndReceive3"},
+    {0x4E,  NULL,                                       "ReplyAndReceive4"},
+    {0x4F,  NULL,                                       "ReplyAndReceive"},
+    {0x50,  NULL,                                       "BindInterrupt"},
+    {0x51,  NULL,                                       "UnbindInterrupt"},
+    {0x52,  NULL,                                       "InvalidateProcessDataCache"},
+    {0x53,  NULL,                                       "StoreProcessDataCache"},
+    {0x54,  NULL,                                       "FlushProcessDataCache"},
+    {0x55,  NULL,                                       "StartInterProcessDma"},
+    {0x56,  NULL,                                       "StopDma"},
+    {0x57,  NULL,                                       "GetDmaState"},
+    {0x58,  NULL,                                       "RestartDma"},
+    {0x59,  NULL,                                       "Unknown"},
+    {0x5A,  NULL,                                       "Unknown"},
+    {0x5B,  NULL,                                       "Unknown"},
+    {0x5C,  NULL,                                       "Unknown"},
+    {0x5D,  NULL,                                       "Unknown"},
+    {0x5E,  NULL,                                       "Unknown"},
+    {0x5F,  NULL,                                       "Unknown"},
+    {0x60,  NULL,                                       "DebugActiveProcess"},
+    {0x61,  NULL,                                       "BreakDebugProcess"},
+    {0x62,  NULL,                                       "TerminateDebugProcess"},
+    {0x63,  NULL,                                       "GetProcessDebugEvent"},
+    {0x64,  NULL,                                       "ContinueDebugEvent"},
+    {0x65,  NULL,                                       "GetProcessList"},
+    {0x66,  NULL,                                       "GetThreadList"},
+    {0x67,  NULL,                                       "GetDebugThreadContext"},
+    {0x68,  NULL,                                       "SetDebugThreadContext"},
+    {0x69,  NULL,                                       "QueryDebugProcessMemory"},
+    {0x6A,  NULL,                                       "ReadProcessMemory"},
+    {0x6B,  NULL,                                       "WriteProcessMemory"},
+    {0x6C,  NULL,                                       "SetHardwareBreakPoint"},
+    {0x6D,  NULL,                                       "GetDebugThreadParam"},
+    {0x6E,  NULL,                                       "Unknown"},
+    {0x6F,  NULL,                                       "Unknown"},
+    {0x70,  NULL,                                       "ControlProcessMemory"},
+    {0x71,  NULL,                                       "MapProcessMemory"},
+    {0x72,  NULL,                                       "UnmapProcessMemory"},
+    {0x73,  NULL,                                       "Unknown"},
+    {0x74,  NULL,                                       "Unknown"},
+    {0x75,  NULL,                                       "Unknown"},
+    {0x76,  NULL,                                       "TerminateProcess"},
+    {0x77,  NULL,                                       "Unknown"},
+    {0x78,  NULL,                                       "CreateResourceLimit"},
+    {0x79,  NULL,                                       "Unknown"},
+    {0x7A,  NULL,                                       "Unknown"},
+    {0x7B,  NULL,                                       "Unknown"},
+    {0x7C,  NULL,                                       "KernelSetState"},
+    {0x7D,  NULL,                                       "QueryProcessMemory"},
 };
 
 void Register() {
diff --git a/src/core/loader.cpp b/src/core/loader.cpp
index 7c1dfef61..8756588ae 100644
--- a/src/core/loader.cpp
+++ b/src/core/loader.cpp
@@ -89,8 +89,8 @@ bool Load_DAT(std::string &filename) {
         * but for the sake of making it easier... we'll temporarily/hackishly
         * allow it. No sense in making a proper reader for this.
         */
-        u32 entrypoint = 0x080c3ee0; // write to same entrypoint as elf
-        u32 payload_offset = 0x6F4;
+        u32 entrypoint = 0x00100000; // write to same entrypoint as elf
+        u32 payload_offset = 0xA150;
         
         const u8 *src = &buffer[payload_offset];
         u8 *dst = Memory::GetPointer(entrypoint);
@@ -114,6 +114,47 @@ bool Load_DAT(std::string &filename) {
     return true;
 }
 
+
+/// Loads a CTR BIN file extracted from an ExeFS
+bool Load_BIN(std::string &filename) {
+    std::string full_path = filename;
+    std::string path, file, extension;
+    SplitPath(ReplaceAll(full_path, "\\", "/"), &path, &file, &extension);
+#if EMU_PLATFORM == PLATFORM_WINDOWS
+    path = ReplaceAll(path, "/", "\\");
+#endif
+    File::IOFile f(filename, "rb");
+
+    if (f.IsOpen()) {
+        u64 size = f.GetSize();
+        u8* buffer = new u8[size];
+
+        f.ReadBytes(buffer, size);
+
+        u32 entrypoint = 0x00100000; // Hardcoded, read from exheader
+        
+        const u8 *src = buffer;
+        u8 *dst = Memory::GetPointer(entrypoint);
+        u32 srcSize = size;
+        u32 *s = (u32*)src;
+        u32 *d = (u32*)dst;
+        for (int j = 0; j < (int)(srcSize + 3) / 4; j++)
+        {
+            *d++ = (*s++);
+        }
+        
+        Core::g_app_core->SetPC(entrypoint);
+
+        delete[] buffer;
+    }
+    else {
+        return false;
+    }
+    f.Close();
+
+    return true;
+}
+
 namespace Loader {
 
 bool IsBootableDirectory() {
@@ -145,6 +186,9 @@ FileType IdentifyFile(std::string &filename) {
     else if (!strcasecmp(extension.c_str(), ".elf")) {
         return FILETYPE_CTR_ELF; // TODO(bunnei): Do some filetype checking :p
     }
+    else if (!strcasecmp(extension.c_str(), ".bin")) {
+        return FILETYPE_CTR_BIN;
+    }
     else if (!strcasecmp(extension.c_str(), ".dat")) {
         return FILETYPE_LAUNCHER_DAT;
     }
@@ -178,6 +222,9 @@ bool LoadFile(std::string &filename, std::string *error_string) {
     case FILETYPE_CTR_ELF:
         return Load_ELF(filename);
 
+    case FILETYPE_CTR_BIN:
+        return Load_BIN(filename);
+
     case FILETYPE_LAUNCHER_DAT:
         return Load_DAT(filename);
 
@@ -215,7 +262,7 @@ bool LoadFile(std::string &filename, std::string *error_string) {
     case FILETYPE_UNKNOWN:
     default:
         ERROR_LOG(LOADER, "Failed to identify file");
-        *error_string = "Failed to identify file";
+        *error_string = " Failed to identify file";
         break;
     }
     return false;
diff --git a/src/core/loader.h b/src/core/loader.h
index df30f16c7..9d4aaa874 100644
--- a/src/core/loader.h
+++ b/src/core/loader.h
@@ -17,19 +17,21 @@ enum FileType {
     FILETYPE_CTR_CIA,
     FILETYPE_CTR_CXI,
     FILETYPE_CTR_ELF,
-    FILETYPE_LAUNCHER_DAT,
-
-	FILETYPE_DIRECTORY_CXI,
-
-	FILETYPE_UNKNOWN_BIN,
-	FILETYPE_UNKNOWN_ELF,
+    FILETYPE_CTR_BIN,
 
-	FILETYPE_ARCHIVE_RAR,
-	FILETYPE_ARCHIVE_ZIP,
-
-	FILETYPE_NORMAL_DIRECTORY,
+    FILETYPE_LAUNCHER_DAT,
 
-	FILETYPE_UNKNOWN
+    FILETYPE_DIRECTORY_CXI,
+    
+    FILETYPE_UNKNOWN_BIN,
+    FILETYPE_UNKNOWN_ELF,
+    
+    FILETYPE_ARCHIVE_RAR,
+    FILETYPE_ARCHIVE_ZIP,
+    
+    FILETYPE_NORMAL_DIRECTORY,
+    
+    FILETYPE_UNKNOWN
 };
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/core/mem_map.cpp b/src/core/mem_map.cpp
index af99cbe32..59560b87d 100644
--- a/src/core/mem_map.cpp
+++ b/src/core/mem_map.cpp
@@ -21,6 +21,7 @@ u8* g_heap                      = NULL;         ///< Application heap (main memo
 u8* g_heap_gsp                  = NULL;         ///< GSP heap (main memory)
 u8* g_vram                      = NULL;         ///< Video memory (VRAM) pointer
 u8* g_shared_mem                = NULL;         ///< Shared memory
+u8* g_kernel_mem;                               ///< Kernel memory
 
 u8* g_physical_bootrom          = NULL;         ///< Bootrom physical memory
 u8* g_uncached_bootrom          = NULL;
@@ -30,6 +31,7 @@ u8* g_physical_fcram            = NULL;         ///< Main physical memory (FCRAM
 u8* g_physical_heap_gsp         = NULL;         ///< GSP heap physical memory
 u8* g_physical_vram             = NULL;         ///< Video physical memory (VRAM)
 u8* g_physical_shared_mem       = NULL;         ///< Physical shared memory
+u8* g_physical_kernel_mem;                      ///< Kernel memory
 
 // We don't declare the IO region in here since its handled by other means.
 static MemoryView g_views[] = {
@@ -37,6 +39,7 @@ static MemoryView g_views[] = {
     {&g_vram,       &g_physical_vram,       VRAM_VADDR,             VRAM_SIZE,          0},
     {&g_heap,       &g_physical_fcram,      HEAP_VADDR,             HEAP_SIZE,          MV_IS_PRIMARY_RAM},
     {&g_shared_mem, &g_physical_shared_mem, SHARED_MEMORY_VADDR,    SHARED_MEMORY_SIZE, 0},
+    {&g_kernel_mem, &g_physical_kernel_mem, KERNEL_MEMORY_VADDR,    KERNEL_MEMORY_SIZE, 0},
     {&g_heap_gsp,   &g_physical_heap_gsp,   HEAP_GSP_VADDR,         HEAP_GSP_SIZE,      0},
 };
 
diff --git a/src/core/mem_map.h b/src/core/mem_map.h
index 00c3b47fc..af2212a5f 100644
--- a/src/core/mem_map.h
+++ b/src/core/mem_map.h
@@ -32,10 +32,20 @@ enum {
     SHARED_MEMORY_VADDR_END = (SHARED_MEMORY_VADDR + SHARED_MEMORY_SIZE),
     SHARED_MEMORY_MASK      = (SHARED_MEMORY_SIZE - 1),
 
+    CONFIG_MEMORY_SIZE      = 0x00001000,   ///< Configuration memory size
+    CONFIG_MEMORY_VADDR     = 0x1FF80000,   ///< Configuration memory virtual address
+    CONFIG_MEMORY_VADDR_END = (CONFIG_MEMORY_VADDR + CONFIG_MEMORY_SIZE),
+    CONFIG_MEMORY_MASK      = (CONFIG_MEMORY_SIZE - 1),
+
+    KERNEL_MEMORY_SIZE      = 0x00001000,   ///< Kernel memory size
+    KERNEL_MEMORY_VADDR     = 0xFFFF0000,   ///< Kernel memory where the kthread objects etc are
+    KERNEL_MEMORY_VADDR_END = (KERNEL_MEMORY_VADDR + KERNEL_MEMORY_SIZE),
+    KERNEL_MEMORY_MASK      = (KERNEL_MEMORY_SIZE - 1),
+
     EXEFS_CODE_SIZE         = 0x03F00000,
     EXEFS_CODE_VADDR        = 0x00100000,   ///< ExeFS:/.code is loaded here
     EXEFS_CODE_VADDR_END    = (EXEFS_CODE_VADDR + EXEFS_CODE_SIZE),
-    EXEFS_CODE_MASK         = (EXEFS_CODE_VADDR - 1),
+    EXEFS_CODE_MASK         = 0x03FFFFFF,
 
     HEAP_SIZE               = FCRAM_SIZE,   ///< Application heap size
     //HEAP_PADDR              = HEAP_GSP_SIZE,
@@ -105,6 +115,7 @@ extern u8* g_heap_gsp;      ///< GSP heap (main memory)
 extern u8* g_heap;          ///< Application heap (main memory)
 extern u8* g_vram;          ///< Video memory (VRAM)
 extern u8* g_shared_mem;    ///< Shared memory
+extern u8* g_kernel_mem;    ///< Kernel memory
 extern u8* g_exefs_code;    ///< ExeFS:/.code is loaded here
 
 void Init();
diff --git a/src/core/mem_map_funcs.cpp b/src/core/mem_map_funcs.cpp
index 2284b535c..8ab647714 100644
--- a/src/core/mem_map_funcs.cpp
+++ b/src/core/mem_map_funcs.cpp
@@ -9,6 +9,7 @@
 #include "core/mem_map.h"
 #include "core/hw/hw.h"
 #include "hle/hle.h"
+#include "hle/config_mem.h"
 
 namespace Memory {
 
@@ -46,12 +47,10 @@ inline void _Read(T &var, const u32 addr) {
     // Could just do a base-relative read, too.... TODO
 
     const u32 vaddr = _VirtualAddress(addr);
-    
-    // Memory allocated for HLE use that can be addressed from the emulated application
-    // The primary use of this is sharing a commandbuffer between the HLE OS (syscore) and the LLE
-    // core running the user application (appcore)
-    if (vaddr >= HLE::CMD_BUFFER_ADDR && vaddr < HLE::CMD_BUFFER_ADDR_END) {
-        HLE::Read<T>(var, vaddr);
+
+    // Kernel memory command buffer
+    if (vaddr >= KERNEL_MEMORY_VADDR && vaddr < KERNEL_MEMORY_VADDR_END) {
+        var = *((const T*)&g_kernel_mem[vaddr & KERNEL_MEMORY_MASK]);
 
     // Hardware I/O register reads
     // 0x10XXXXXX- is physical address space, 0x1EXXXXXX is virtual address space
@@ -74,6 +73,10 @@ inline void _Read(T &var, const u32 addr) {
     } else if ((vaddr >= SHARED_MEMORY_VADDR)  && (vaddr < SHARED_MEMORY_VADDR_END)) {
         var = *((const T*)&g_shared_mem[vaddr & SHARED_MEMORY_MASK]);
 
+    // Config memory
+    } else if ((vaddr >= CONFIG_MEMORY_VADDR)  && (vaddr < CONFIG_MEMORY_VADDR_END)) {
+        ConfigMem::Read<T>(var, vaddr);
+
     // VRAM
     } else if ((vaddr >= VRAM_VADDR)  && (vaddr < VRAM_VADDR_END)) {
         var = *((const T*)&g_vram[vaddr & VRAM_MASK]);
@@ -87,11 +90,9 @@ template <typename T>
 inline void _Write(u32 addr, const T data) {
     u32 vaddr = _VirtualAddress(addr);
     
-    // Memory allocated for HLE use that can be addressed from the emulated application
-    // The primary use of this is sharing a commandbuffer between the HLE OS (syscore) and the LLE
-    // core running the user application (appcore)
-    if (vaddr >= HLE::CMD_BUFFER_ADDR && vaddr < HLE::CMD_BUFFER_ADDR_END) {
-        HLE::Write<T>(vaddr, data);
+    // Kernel memory command buffer
+    if (vaddr >= KERNEL_MEMORY_VADDR && vaddr < KERNEL_MEMORY_VADDR_END) {
+        *(T*)&g_kernel_mem[vaddr & KERNEL_MEMORY_MASK] = data;
 
     // Hardware I/O register writes
     // 0x10XXXXXX- is physical address space, 0x1EXXXXXX is virtual address space
@@ -118,12 +119,12 @@ inline void _Write(u32 addr, const T data) {
     } else if ((vaddr >= VRAM_VADDR)  && (vaddr < VRAM_VADDR_END)) {
         *(T*)&g_vram[vaddr & VRAM_MASK] = data;
 
-    } else if ((vaddr & 0xFFF00000) == 0x1FF00000) {
-        _assert_msg_(MEMMAP, false, "umimplemented write to DSP memory");
-    } else if ((vaddr & 0xFFFF0000) == 0x1FF80000) {
-        _assert_msg_(MEMMAP, false, "umimplemented write to Configuration Memory");
-    } else if ((vaddr & 0xFFFFF000) == 0x1FF81000) {
-        _assert_msg_(MEMMAP, false, "umimplemented write to shared page");
+    //} else if ((vaddr & 0xFFF00000) == 0x1FF00000) {
+    //    _assert_msg_(MEMMAP, false, "umimplemented write to DSP memory");
+    //} else if ((vaddr & 0xFFFF0000) == 0x1FF80000) {
+    //    _assert_msg_(MEMMAP, false, "umimplemented write to Configuration Memory");
+    //} else if ((vaddr & 0xFFFFF000) == 0x1FF81000) {
+    //    _assert_msg_(MEMMAP, false, "umimplemented write to shared page");
     
     // Error out...
     } else {
@@ -135,8 +136,12 @@ inline void _Write(u32 addr, const T data) {
 u8 *GetPointer(const u32 addr) {
     const u32 vaddr = _VirtualAddress(addr);
 
+    // Kernel memory command buffer
+    if (vaddr >= KERNEL_MEMORY_VADDR && vaddr < KERNEL_MEMORY_VADDR_END) {
+        return g_kernel_mem + (vaddr & KERNEL_MEMORY_MASK);
+
     // ExeFS:/.code is loaded here
-    if ((vaddr >= EXEFS_CODE_VADDR)  && (vaddr < EXEFS_CODE_VADDR_END)) {
+    } else if ((vaddr >= EXEFS_CODE_VADDR)  && (vaddr < EXEFS_CODE_VADDR_END)) {
         return g_exefs_code + (vaddr & EXEFS_CODE_MASK);
 
     // FCRAM - GSP heap