81 files changed, 4442 insertions, 3639 deletions
diff --git a/src/citra/citra.cpp b/src/citra/citra.cpp
index f6a52758b..69f0b35b3 100644
--- a/src/citra/citra.cpp
+++ b/src/citra/citra.cpp
@@ -51,6 +51,8 @@ int __cdecl main(int argc, char **argv) {
         Core::RunLoop();
     }
 
+    System::Shutdown();
+
     delete emu_window;
 
     return 0;
diff --git a/src/citra/default_ini.h b/src/citra/default_ini.h
index f41020f7b..ebe2e9767 100644
--- a/src/citra/default_ini.h
+++ b/src/citra/default_ini.h
@@ -27,7 +27,7 @@ pad_sleft =
 pad_sright =
 
 [Core]
-cpu_core = ## 0: Interpreter (default), 1: FastInterpreter (experimental)
+cpu_core = ## 0: Interpreter (default), 1: OldInterpreter (may work better, soon to be deprecated)
 gpu_refresh_rate = ## 30 (default)
 frame_skip = ## 0: No frameskip (default), 1 : 2x frameskip, 2 : 4x frameskip, etc.
 
diff --git a/src/citra_qt/bootmanager.cpp b/src/citra_qt/bootmanager.cpp
index 6d08d6afc..e753ea108 100644
--- a/src/citra_qt/bootmanager.cpp
+++ b/src/citra_qt/bootmanager.cpp
@@ -13,6 +13,7 @@
 
 #include "core/core.h"
 #include "core/settings.h"
+#include "core/system.h"
 
 #include "video_core/debug_utils/debug_utils.h"
 
@@ -89,6 +90,8 @@ void EmuThread::Stop()
         }
     }
     LOG_INFO(Frontend, "EmuThread stopped");
+
+    System::Shutdown();
 }
 
 
diff --git a/src/citra_qt/bootmanager.hxx b/src/citra_qt/bootmanager.hxx
index 5f69f15ea..1c893384c 100644
--- a/src/citra_qt/bootmanager.hxx
+++ b/src/citra_qt/bootmanager.hxx
@@ -1,3 +1,7 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
 #include <atomic>
 
 #include <QThread>
diff --git a/src/citra_qt/config/controller_config.cpp b/src/citra_qt/config/controller_config.cpp
index 52dfb627c..41000e29b 100644
--- a/src/citra_qt/config/controller_config.cpp
+++ b/src/citra_qt/config/controller_config.cpp
@@ -1,3 +1,7 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
 #include <QDialogButtonBox>
 
 #include "controller_config.hxx"
diff --git a/src/citra_qt/config/controller_config.hxx b/src/citra_qt/config/controller_config.hxx
index 0e423ee50..451593de1 100644
--- a/src/citra_qt/config/controller_config.hxx
+++ b/src/citra_qt/config/controller_config.hxx
@@ -1,3 +1,7 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
 #ifndef _CONTROLLER_CONFIG_HXX_
 #define _CONTROLLER_CONFIG_HXX_
 
diff --git a/src/citra_qt/config/controller_config_util.cpp b/src/citra_qt/config/controller_config_util.cpp
index aee3f8616..272e8d41e 100644
--- a/src/citra_qt/config/controller_config_util.cpp
+++ b/src/citra_qt/config/controller_config_util.cpp
@@ -1,3 +1,7 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
 #include <QPushButton>
 #include <QStyle>
 #include <QGridLayout>
diff --git a/src/citra_qt/config/controller_config_util.hxx b/src/citra_qt/config/controller_config_util.hxx
index af38f126c..15e025b57 100644
--- a/src/citra_qt/config/controller_config_util.hxx
+++ b/src/citra_qt/config/controller_config_util.hxx
@@ -1,3 +1,7 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
 #ifndef _CONTROLLER_CONFIG_UTIL_HXX_
 #define _CONTROLLER_CONFIG_UTIL_HXX_
 
diff --git a/src/citra_qt/debugger/callstack.cpp b/src/citra_qt/debugger/callstack.cpp
index a9ec2f7fe..4a47ad468 100644
--- a/src/citra_qt/debugger/callstack.cpp
+++ b/src/citra_qt/debugger/callstack.cpp
@@ -1,3 +1,7 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
 #include <QStandardItemModel>
 
 #include "callstack.hxx"
@@ -14,9 +18,9 @@ CallstackWidget::CallstackWidget(QWidget* parent): QDockWidget(parent)
 
     callstack_model = new QStandardItemModel(this);
     callstack_model->setColumnCount(4);
-    callstack_model->setHeaderData(0, Qt::Horizontal, "Stack pointer");
-    callstack_model->setHeaderData(2, Qt::Horizontal, "Return address");
-    callstack_model->setHeaderData(1, Qt::Horizontal, "Call address");
+    callstack_model->setHeaderData(0, Qt::Horizontal, "Stack Pointer");
+    callstack_model->setHeaderData(2, Qt::Horizontal, "Return Address");
+    callstack_model->setHeaderData(1, Qt::Horizontal, "Call Address");
     callstack_model->setHeaderData(3, Qt::Horizontal, "Function");
     ui.treeView->setModel(callstack_model);
 }
diff --git a/src/citra_qt/debugger/callstack.hxx b/src/citra_qt/debugger/callstack.hxx
index 680a73b6d..4f4f74823 100644
--- a/src/citra_qt/debugger/callstack.hxx
+++ b/src/citra_qt/debugger/callstack.hxx
@@ -1,3 +1,7 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
 #include <QDockWidget>
 #include "ui_callstack.h"
 
diff --git a/src/citra_qt/debugger/callstack.ui b/src/citra_qt/debugger/callstack.ui
index b3c4db632..b0e31120f 100644
--- a/src/citra_qt/debugger/callstack.ui
+++ b/src/citra_qt/debugger/callstack.ui
@@ -11,7 +11,7 @@
    </rect>
   </property>
   <property name="windowTitle">
-   <string>Call stack</string>
+   <string>Call Stack</string>
   </property>
   <widget class="QWidget" name="dockWidgetContents">
    <layout class="QVBoxLayout" name="verticalLayout">
diff --git a/src/citra_qt/debugger/disassembler.cpp b/src/citra_qt/debugger/disassembler.cpp
index 14745f3bb..636a0f187 100644
--- a/src/citra_qt/debugger/disassembler.cpp
+++ b/src/citra_qt/debugger/disassembler.cpp
@@ -1,3 +1,7 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
 #include "disassembler.hxx"
 
 #include "../bootmanager.hxx"
diff --git a/src/citra_qt/debugger/disassembler.hxx b/src/citra_qt/debugger/disassembler.hxx
index a842da956..6d3cef108 100644
--- a/src/citra_qt/debugger/disassembler.hxx
+++ b/src/citra_qt/debugger/disassembler.hxx
@@ -1,3 +1,7 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
 #include <QAbstractItemModel>
 #include <QDockWidget>
 #include "ui_disassembler.h"
diff --git a/src/citra_qt/debugger/graphics.cpp b/src/citra_qt/debugger/graphics.cpp
index 6ff4c290d..9633d367e 100644
--- a/src/citra_qt/debugger/graphics.cpp
+++ b/src/citra_qt/debugger/graphics.cpp
@@ -72,7 +72,7 @@ void GPUCommandStreamItemModel::OnGXCommandFinishedInternal(int total_command_co
 
 GPUCommandStreamWidget::GPUCommandStreamWidget(QWidget* parent) : QDockWidget(tr("Graphics Debugger"), parent)
 {
-    // TODO: set objectName!
+    setObjectName("GraphicsDebugger");
 
     GPUCommandStreamItemModel* command_model = new GPUCommandStreamItemModel(this);
     g_debugger.RegisterObserver(command_model);
diff --git a/src/citra_qt/debugger/graphics_breakpoints.cpp b/src/citra_qt/debugger/graphics_breakpoints.cpp
index 9486f06cc..170aa736d 100644
--- a/src/citra_qt/debugger/graphics_breakpoints.cpp
+++ b/src/citra_qt/debugger/graphics_breakpoints.cpp
@@ -44,7 +44,7 @@ QVariant BreakPointModel::data(const QModelIndex& index, int role) const
                 { Pica::DebugContext::Event::CommandProcessed, tr("Pica command processed") },
                 { Pica::DebugContext::Event::IncomingPrimitiveBatch, tr("Incoming primitive batch") },
                 { Pica::DebugContext::Event::FinishedPrimitiveBatch, tr("Finished primitive batch") },
-                { Pica::DebugContext::Event::VertexLoaded, tr("Vertex Loaded") }
+                { Pica::DebugContext::Event::VertexLoaded, tr("Vertex loaded") }
             };
 
             _dbg_assert_(Debug_GPU, map.size() == static_cast<size_t>(Pica::DebugContext::Event::NumEvents));
diff --git a/src/citra_qt/debugger/graphics_cmdlists.cpp b/src/citra_qt/debugger/graphics_cmdlists.cpp
index 753cc25da..708b805a7 100644
--- a/src/citra_qt/debugger/graphics_cmdlists.cpp
+++ b/src/citra_qt/debugger/graphics_cmdlists.cpp
@@ -229,7 +229,7 @@ void GPUCommandListModel::OnPicaTraceFinished(const Pica::DebugUtils::PicaTrace&
      cmd_id < PICA_REG_INDEX(reg_name) + sizeof(decltype(Pica::registers.reg_name)) / 4)
 
 void GPUCommandListWidget::OnCommandDoubleClicked(const QModelIndex& index) {
-    const int command_id = list_widget->model()->data(index, GPUCommandListModel::CommandIdRole).toInt();
+    const unsigned int command_id = list_widget->model()->data(index, GPUCommandListModel::CommandIdRole).toUInt();
     if (COMMAND_IN_RANGE(command_id, texture0) ||
         COMMAND_IN_RANGE(command_id, texture1) ||
         COMMAND_IN_RANGE(command_id, texture2)) {
@@ -255,7 +255,7 @@ void GPUCommandListWidget::OnCommandDoubleClicked(const QModelIndex& index) {
 void GPUCommandListWidget::SetCommandInfo(const QModelIndex& index) {
     QWidget* new_info_widget;
 
-    const int command_id = list_widget->model()->data(index, GPUCommandListModel::CommandIdRole).toInt();
+    const unsigned int command_id = list_widget->model()->data(index, GPUCommandListModel::CommandIdRole).toUInt();
     if (COMMAND_IN_RANGE(command_id, texture0) ||
         COMMAND_IN_RANGE(command_id, texture1) ||
         COMMAND_IN_RANGE(command_id, texture2)) {
diff --git a/src/citra_qt/debugger/graphics_framebuffer.cpp b/src/citra_qt/debugger/graphics_framebuffer.cpp
index dd41c3880..7ef699f37 100644
--- a/src/citra_qt/debugger/graphics_framebuffer.cpp
+++ b/src/citra_qt/debugger/graphics_framebuffer.cpp
@@ -10,6 +10,7 @@
 #include <QPushButton>
 #include <QSpinBox>
 
+#include "video_core/color.h"
 #include "video_core/pica.h"
 
 #include "graphics_framebuffer.hxx"
@@ -157,7 +158,7 @@ void GraphicsFramebufferWidget::OnFramebufferAddressChanged(qint64 new_value)
     }
 }
 
-void GraphicsFramebufferWidget::OnFramebufferWidthChanged(int new_value)
+void GraphicsFramebufferWidget::OnFramebufferWidthChanged(unsigned int new_value)
 {
     if (framebuffer_width != new_value) {
         framebuffer_width = new_value;
@@ -167,7 +168,7 @@ void GraphicsFramebufferWidget::OnFramebufferWidthChanged(int new_value)
     }
 }
 
-void GraphicsFramebufferWidget::OnFramebufferHeightChanged(int new_value)
+void GraphicsFramebufferWidget::OnFramebufferHeightChanged(unsigned int new_value)
 {
     if (framebuffer_height != new_value) {
         framebuffer_height = new_value;
@@ -202,7 +203,8 @@ void GraphicsFramebufferWidget::OnUpdate()
         framebuffer_address = framebuffer.GetColorBufferPhysicalAddress();
         framebuffer_width = framebuffer.GetWidth();
         framebuffer_height = framebuffer.GetHeight();
-        framebuffer_format = static_cast<Format>(framebuffer.color_format);
+        // TODO: It's unknown how this format is actually specified
+        framebuffer_format = Format::RGBA8;
 
         break;
     }
@@ -225,8 +227,8 @@ void GraphicsFramebufferWidget::OnUpdate()
     {
         QImage decoded_image(framebuffer_width, framebuffer_height, QImage::Format_ARGB32);
         u32* color_buffer = (u32*)Memory::GetPointer(Pica::PAddrToVAddr(framebuffer_address));
-        for (unsigned y = 0; y < framebuffer_height; ++y) {
-            for (unsigned x = 0; x < framebuffer_width; ++x) {
+        for (unsigned int y = 0; y < framebuffer_height; ++y) {
+            for (unsigned int x = 0; x < framebuffer_width; ++x) {
                 u32 value = *(color_buffer + x + y * framebuffer_width);
 
                 decoded_image.setPixel(x, y, qRgba((value >> 16) & 0xFF, (value >> 8) & 0xFF, value & 0xFF, 255/*value >> 24*/));
@@ -240,8 +242,8 @@ void GraphicsFramebufferWidget::OnUpdate()
     {
         QImage decoded_image(framebuffer_width, framebuffer_height, QImage::Format_ARGB32);
         u8* color_buffer = Memory::GetPointer(Pica::PAddrToVAddr(framebuffer_address));
-        for (unsigned y = 0; y < framebuffer_height; ++y) {
-            for (unsigned x = 0; x < framebuffer_width; ++x) {
+        for (unsigned int y = 0; y < framebuffer_height; ++y) {
+            for (unsigned int x = 0; x < framebuffer_width; ++x) {
                 u8* pixel_pointer = color_buffer + x * 3 + y * 3 * framebuffer_width;
 
                 decoded_image.setPixel(x, y, qRgba(pixel_pointer[0], pixel_pointer[1], pixel_pointer[2], 255/*value >> 24*/));
@@ -255,13 +257,13 @@ void GraphicsFramebufferWidget::OnUpdate()
     {
         QImage decoded_image(framebuffer_width, framebuffer_height, QImage::Format_ARGB32);
         u32* color_buffer = (u32*)Memory::GetPointer(Pica::PAddrToVAddr(framebuffer_address));
-        for (unsigned y = 0; y < framebuffer_height; ++y) {
-            for (unsigned x = 0; x < framebuffer_width; ++x) {
+        for (unsigned int y = 0; y < framebuffer_height; ++y) {
+            for (unsigned int x = 0; x < framebuffer_width; ++x) {
                 u16 value = *(u16*)(((u8*)color_buffer) + x * 2 + y * framebuffer_width * 2);
-                u8 r = (value >> 11) & 0x1F;
-                u8 g = (value >> 6) & 0x1F;
-                u8 b = (value >> 1) & 0x1F;
-                u8 a = value & 1;
+                u8 r = Color::Convert5To8((value >> 11) & 0x1F);
+                u8 g = Color::Convert5To8((value >> 6) & 0x1F);
+                u8 b = Color::Convert5To8((value >> 1) & 0x1F);
+                u8 a = Color::Convert1To8(value & 1);
 
                 decoded_image.setPixel(x, y, qRgba(r, g, b, 255/*a*/));
             }
diff --git a/src/citra_qt/debugger/graphics_framebuffer.hxx b/src/citra_qt/debugger/graphics_framebuffer.hxx
index 56215761e..02813525c 100644
--- a/src/citra_qt/debugger/graphics_framebuffer.hxx
+++ b/src/citra_qt/debugger/graphics_framebuffer.hxx
@@ -62,8 +62,8 @@ public:
 public slots:
     void OnFramebufferSourceChanged(int new_value);
     void OnFramebufferAddressChanged(qint64 new_value);
-    void OnFramebufferWidthChanged(int new_value);
-    void OnFramebufferHeightChanged(int new_value);
+    void OnFramebufferWidthChanged(unsigned int new_value);
+    void OnFramebufferHeightChanged(unsigned int new_value);
     void OnFramebufferFormatChanged(int new_value);
     void OnUpdate();
 
diff --git a/src/citra_qt/debugger/ramview.cpp b/src/citra_qt/debugger/ramview.cpp
index 3f899b95e..d3ff69a61 100644
--- a/src/citra_qt/debugger/ramview.cpp
+++ b/src/citra_qt/debugger/ramview.cpp
@@ -1,3 +1,7 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
 #include "ramview.hxx"
 
 #include "common/common.h"
diff --git a/src/citra_qt/debugger/ramview.hxx b/src/citra_qt/debugger/ramview.hxx
index 1db1546aa..18423036f 100644
--- a/src/citra_qt/debugger/ramview.hxx
+++ b/src/citra_qt/debugger/ramview.hxx
@@ -1,3 +1,7 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
 #include "qhexedit.h"
 
 class GRamView : public QHexEdit
diff --git a/src/citra_qt/debugger/registers.cpp b/src/citra_qt/debugger/registers.cpp
index ed17ee4b4..f798495b2 100644
--- a/src/citra_qt/debugger/registers.cpp
+++ b/src/citra_qt/debugger/registers.cpp
@@ -1,3 +1,7 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
 #include "registers.hxx"
 
 #include "core/core.h"
diff --git a/src/citra_qt/debugger/registers.hxx b/src/citra_qt/debugger/registers.hxx
index 4cca957ce..ac8429f2b 100644
--- a/src/citra_qt/debugger/registers.hxx
+++ b/src/citra_qt/debugger/registers.hxx
@@ -1,3 +1,7 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
 #include "ui_registers.h"
 
 #include <QDockWidget>
diff --git a/src/citra_qt/debugger/registers.ui b/src/citra_qt/debugger/registers.ui
index 6537c9cd6..c81ae03f9 100644
--- a/src/citra_qt/debugger/registers.ui
+++ b/src/citra_qt/debugger/registers.ui
@@ -11,7 +11,7 @@
    </rect>
   </property>
   <property name="windowTitle">
-   <string>ARM registers</string>
+   <string>ARM Registers</string>
   </property>
   <widget class="QWidget" name="dockWidgetContents">
    <layout class="QVBoxLayout" name="verticalLayout">
diff --git a/src/citra_qt/hotkeys.cpp b/src/citra_qt/hotkeys.cpp
index 5d0b52e4f..08be7ff74 100644
--- a/src/citra_qt/hotkeys.cpp
+++ b/src/citra_qt/hotkeys.cpp
@@ -1,3 +1,7 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
 #include <QKeySequence>
 #include <QSettings>
 #include "hotkeys.hxx"
diff --git a/src/citra_qt/hotkeys.hxx b/src/citra_qt/hotkeys.hxx
index 66ef7bb4e..75c7cc625 100644
--- a/src/citra_qt/hotkeys.hxx
+++ b/src/citra_qt/hotkeys.hxx
@@ -1,3 +1,7 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
 #include <QShortcut>
 #include <QDialog>
 #include "ui_hotkeys.h"
diff --git a/src/citra_qt/main.cpp b/src/citra_qt/main.cpp
index b12e6a02b..64e389f25 100644
--- a/src/citra_qt/main.cpp
+++ b/src/citra_qt/main.cpp
@@ -1,3 +1,7 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
 #include <thread>
 
 #include <QtGui>
@@ -107,7 +111,7 @@ GMainWindow::GMainWindow()
     restoreState(settings.value("state").toByteArray());
     render_window->restoreGeometry(settings.value("geometryRenderWindow").toByteArray());
 
-    ui.action_Popout_Window_Mode->setChecked(settings.value("popoutWindowMode", true).toBool());
+    ui.action_Single_Window_Mode->setChecked(settings.value("singleWindowMode", true).toBool());
     ToggleWindowMode();
 
     // Setup connections
@@ -116,7 +120,7 @@ GMainWindow::GMainWindow()
     connect(ui.action_Start, SIGNAL(triggered()), this, SLOT(OnStartGame()));
     connect(ui.action_Pause, SIGNAL(triggered()), this, SLOT(OnPauseGame()));
     connect(ui.action_Stop, SIGNAL(triggered()), this, SLOT(OnStopGame()));
-    connect(ui.action_Popout_Window_Mode, SIGNAL(triggered(bool)), this, SLOT(ToggleWindowMode()));
+    connect(ui.action_Single_Window_Mode, SIGNAL(triggered(bool)), this, SLOT(ToggleWindowMode()));
     connect(ui.action_Hotkeys, SIGNAL(triggered()), this, SLOT(OnOpenHotkeysDialog()));
 
     // BlockingQueuedConnection is important here, it makes sure we've finished refreshing our views before the CPU continues
@@ -175,13 +179,13 @@ void GMainWindow::BootGame(std::string filename)
 
 void GMainWindow::OnMenuLoadFile()
 {
-    QString filename = QFileDialog::getOpenFileName(this, tr("Load file"), QString(), tr("3DS executable (*.3dsx *.elf *.axf *.bin *.cci *.cxi)"));
+    QString filename = QFileDialog::getOpenFileName(this, tr("Load File"), QString(), tr("3DS executable (*.3ds *.3dsx *.elf *.axf *.bin *.cci *.cxi)"));
     if (filename.size())
        BootGame(filename.toLatin1().data());
 }
 
 void GMainWindow::OnMenuLoadSymbolMap() {
-    QString filename = QFileDialog::getOpenFileName(this, tr("Load symbol map"), QString(), tr("Symbol map (*)"));
+    QString filename = QFileDialog::getOpenFileName(this, tr("Load Symbol Map"), QString(), tr("Symbol map (*)"));
     if (filename.size())
         LoadSymbolMap(filename.toLatin1().data());
 }
@@ -223,8 +227,8 @@ void GMainWindow::OnOpenHotkeysDialog()
 
 void GMainWindow::ToggleWindowMode()
 {
-    bool enable = ui.action_Popout_Window_Mode->isChecked();
-    if (enable && render_window->parent() != nullptr)
+    bool enable = ui.action_Single_Window_Mode->isChecked();
+    if (!enable && render_window->parent() != nullptr)
     {
         ui.horizontalLayout->removeWidget(render_window);
         render_window->setParent(nullptr);
@@ -232,7 +236,7 @@ void GMainWindow::ToggleWindowMode()
         render_window->RestoreGeometry();
         render_window->setFocusPolicy(Qt::NoFocus);
     }
-    else if (!enable && render_window->parent() == nullptr)
+    else if (enable && render_window->parent() == nullptr)
     {
         render_window->BackupGeometry();
         ui.horizontalLayout->addWidget(render_window);
@@ -254,7 +258,7 @@ void GMainWindow::closeEvent(QCloseEvent* event)
     settings.setValue("geometry", saveGeometry());
     settings.setValue("state", saveState());
     settings.setValue("geometryRenderWindow", render_window->saveGeometry());
-    settings.setValue("popoutWindowMode", ui.action_Popout_Window_Mode->isChecked());
+    settings.setValue("singleWindowMode", ui.action_Single_Window_Mode->isChecked());
     settings.setValue("firstStart", false);
     SaveHotkeys(settings);
 
diff --git a/src/citra_qt/main.hxx b/src/citra_qt/main.hxx
index b1b40df46..72df17c50 100644
--- a/src/citra_qt/main.hxx
+++ b/src/citra_qt/main.hxx
@@ -1,3 +1,7 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
 #ifndef _CITRA_QT_MAIN_HXX_
 #define _CITRA_QT_MAIN_HXX_
 
diff --git a/src/citra_qt/main.ui b/src/citra_qt/main.ui
index f3596716f..d06c207a0 100644
--- a/src/citra_qt/main.ui
+++ b/src/citra_qt/main.ui
@@ -58,7 +58,7 @@
     <property name="title">
      <string>&amp;View</string>
     </property>
-    <addaction name="action_Popout_Window_Mode"/>
+    <addaction name="action_Single_Window_Mode"/>
     <addaction name="action_Hotkeys"/>
    </widget>
    <widget class="QMenu" name="menu_Help">
@@ -75,12 +75,12 @@
   <widget class="QStatusBar" name="statusbar"/>
    <action name="action_Load_File">
      <property name="text">
-       <string>Load file...</string>
+       <string>Load File...</string>
      </property>
    </action>
    <action name="action_Load_Symbol_Map">
      <property name="text">
-       <string>Load symbol map...</string>
+       <string>Load Symbol Map...</string>
      </property>
    </action>
    <action name="action_Exit">
@@ -114,12 +114,12 @@
        <string>About Citra</string>
      </property>
    </action>
-   <action name="action_Popout_Window_Mode">
+   <action name="action_Single_Window_Mode">
      <property name="checkable">
        <bool>true</bool>
      </property>
      <property name="text">
-       <string>Popout window</string>
+       <string>Single Window Mode</string>
      </property>
    </action>
    <action name="action_Hotkeys">
diff --git a/src/citra_qt/version.h b/src/citra_qt/version.h
index 07022de5c..9d5a2b1a2 100644
--- a/src/citra_qt/version.h
+++ b/src/citra_qt/version.h
@@ -1,3 +1,7 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
 // TODO: Supposed to be generated...
 // GENERATED - Do not edit!
 #ifndef VERSION_H_
diff --git a/src/common/common_paths.h b/src/common/common_paths.h
index d3f0702bc..e692e5492 100644
--- a/src/common/common_paths.h
+++ b/src/common/common_paths.h
@@ -42,6 +42,7 @@
 #define SDMC_DIR          "sdmc"
 #define EXTSAVEDATA_DIR   "extsavedata"
 #define SAVEDATA_DIR      "savedata"
+#define SAVEDATACHECK_DIR "savedatacheck"
 #define SYSDATA_DIR       "sysdata"
 #define SYSSAVEDATA_DIR   "syssavedata"
 #define SHADERCACHE_DIR   "shader_cache"
diff --git a/src/common/file_util.cpp b/src/common/file_util.cpp
index c44ad4ca1..0a6cd80c8 100644
--- a/src/common/file_util.cpp
+++ b/src/common/file_util.cpp
@@ -678,6 +678,7 @@ const std::string& GetUserPath(const unsigned int DirIDX, const std::string &new
         paths[D_SDMC_IDX]           = paths[D_USER_IDX] + SDMC_DIR DIR_SEP;
         paths[D_EXTSAVEDATA]        = paths[D_USER_IDX] + EXTSAVEDATA_DIR DIR_SEP;
         paths[D_SAVEDATA_IDX]       = paths[D_USER_IDX] + SAVEDATA_DIR DIR_SEP;
+        paths[D_SAVEDATACHECK_IDX]  = paths[D_USER_IDX] + SAVEDATACHECK_DIR DIR_SEP;
         paths[D_SYSDATA_IDX]        = paths[D_USER_IDX] + SYSDATA_DIR DIR_SEP;
         paths[D_SYSSAVEDATA_IDX]    = paths[D_USER_IDX] + SYSSAVEDATA_DIR DIR_SEP;
         paths[D_SHADERCACHE_IDX]    = paths[D_USER_IDX] + SHADERCACHE_DIR DIR_SEP;
@@ -723,6 +724,7 @@ const std::string& GetUserPath(const unsigned int DirIDX, const std::string &new
             paths[D_SDMC_IDX]           = paths[D_USER_IDX] + SDMC_DIR DIR_SEP;
             paths[D_EXTSAVEDATA]        = paths[D_USER_IDX] + EXTSAVEDATA_DIR DIR_SEP;
             paths[D_SAVEDATA_IDX]       = paths[D_USER_IDX] + SAVEDATA_DIR DIR_SEP;
+            paths[D_SAVEDATACHECK_IDX]  = paths[D_USER_IDX] + SAVEDATACHECK_DIR DIR_SEP;
             paths[D_SYSSAVEDATA_IDX]    = paths[D_USER_IDX] + SYSSAVEDATA_DIR DIR_SEP;
             paths[D_SHADERCACHE_IDX]    = paths[D_USER_IDX] + SHADERCACHE_DIR DIR_SEP;
             paths[D_SHADERS_IDX]        = paths[D_USER_IDX] + SHADERS_DIR DIR_SEP;
diff --git a/src/common/file_util.h b/src/common/file_util.h
index ec2415473..c83ecd87d 100644
--- a/src/common/file_util.h
+++ b/src/common/file_util.h
@@ -29,6 +29,7 @@ enum {
     D_SDMC_IDX,
     D_EXTSAVEDATA,
     D_SAVEDATA_IDX,
+    D_SAVEDATACHECK_IDX,
     D_SYSDATA_IDX,
     D_SYSSAVEDATA_IDX,
     D_HIRESTEXTURES_IDX,
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 89ea70d23..b67226d8d 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -20,11 +20,11 @@ set(SRCS
             file_sys/archive_extsavedata.cpp
             file_sys/archive_romfs.cpp
             file_sys/archive_savedata.cpp
+            file_sys/archive_savedatacheck.cpp
             file_sys/archive_sdmc.cpp
             file_sys/archive_systemsavedata.cpp
             file_sys/disk_archive.cpp
-            file_sys/file_romfs.cpp
-            file_sys/directory_romfs.cpp
+            file_sys/ivfc_archive.cpp
             hle/kernel/address_arbiter.cpp
             hle/kernel/event.cpp
             hle/kernel/kernel.cpp
@@ -66,6 +66,7 @@ set(SRCS
             hle/service/soc_u.cpp
             hle/service/srv.cpp
             hle/service/ssl_c.cpp
+            hle/service/y2r_u.cpp
             hle/config_mem.cpp
             hle/hle.cpp
             hle/svc.cpp
@@ -108,13 +109,13 @@ set(HEADERS
             file_sys/archive_extsavedata.h
             file_sys/archive_romfs.h
             file_sys/archive_savedata.h
+            file_sys/archive_savedatacheck.h
             file_sys/archive_sdmc.h
             file_sys/archive_systemsavedata.h
             file_sys/disk_archive.h
             file_sys/file_backend.h
-            file_sys/file_romfs.h
+            file_sys/ivfc_archive.h
             file_sys/directory_backend.h
-            file_sys/directory_romfs.h
             hle/kernel/address_arbiter.h
             hle/kernel/event.h
             hle/kernel/kernel.h
@@ -157,6 +158,7 @@ set(HEADERS
             hle/service/soc_u.h
             hle/service/srv.h
             hle/service/ssl_c.h
+            hle/service/y2r_u.h
             hle/config_mem.h
             hle/result.h
             hle/function_wrappers.h
diff --git a/src/core/arm/dyncom/arm_dyncom_dec.cpp b/src/core/arm/dyncom/arm_dyncom_dec.cpp
index 333b40f54..0927eece1 100644
--- a/src/core/arm/dyncom/arm_dyncom_dec.cpp
+++ b/src/core/arm/dyncom/arm_dyncom_dec.cpp
@@ -136,7 +136,6 @@ const ISEITEM arm_instruction[] = {
     { "pkhbt", 2, 6, 20, 27, 0x00000068, 4, 6, 0x00000001 },
     { "smul", 3, 4, 20, 27, 0x00000016, 7, 7, 0x00000001, 4, 4, 0x00000000 },
     { "smlalxy", 3, 4, 20, 27, 0x00000014, 7, 7, 0x00000001, 4, 4, 0x00000000 },
-    //	{"smlal"	,  2	,  4	, 21, 27, 0x00000007,  4,  7, 0x00000009},
     { "smla", 3, 4, 20, 27, 0x00000010, 7, 7, 0x00000001, 4, 4, 0x00000000 },
     { "mcrr", 1, 6, 20, 27, 0x000000c4 },
     { "mrrc", 1, 6, 20, 27, 0x000000c5 },
@@ -194,6 +193,10 @@ const ISEITEM arm_instruction[] = {
     { "ldc", 2, 0, 25, 27, 0x00000006, 20, 20, 0x00000001 },
     { "swi", 1, 0, 24, 27, 0x0000000f },
     { "bbl", 1, 0, 25, 27, 0x00000005 },
+    { "ldrexd", 2, ARMV6K, 20, 27, 0x0000001B, 4, 7, 0x00000009 },
+    { "strexd", 2, ARMV6K, 20, 27, 0x0000001A, 4, 7, 0x00000009 },
+    { "ldrexh", 2, ARMV6K, 20, 27, 0x0000001F, 4, 7, 0x00000009 },
+    { "strexh", 2, ARMV6K, 20, 27, 0x0000001E, 4, 7, 0x00000009 },
 };
 
 const ISEITEM arm_exclusion_code[] = {
@@ -383,6 +386,11 @@ const ISEITEM arm_exclusion_code[] = {
     { "ldc", 0, 0, 0 },
     { "swi", 0, 0, 0 },
     { "bbl", 0, 0, 0 },
+    { "ldrexd", 0, ARMV6K, 0 },
+    { "strexd", 0, ARMV6K, 0 },
+    { "ldrexh", 0, ARMV6K, 0 },
+    { "strexh", 0, ARMV6K, 0 },
+
     { "bl_1_thumb", 0, INVALID, 0 },    // Should be table[-4]
     { "bl_2_thumb", 0, INVALID, 0 },    // Should be located at the end of the table[-3]
     { "blx_1_thumb", 0, INVALID, 0 },   // Should be located at table[-2]
@@ -395,6 +403,7 @@ int decode_arm_instr(uint32_t instr, int32_t *idx) {
     int ret = DECODE_FAILURE;
     int i = 0;
     int instr_slots = sizeof(arm_instruction) / sizeof(ISEITEM);
+
     for (i = 0; i < instr_slots; i++) {
         n = arm_instruction[i].attribute_value;
         base = 0;
diff --git a/src/core/arm/dyncom/arm_dyncom_dec.h b/src/core/arm/dyncom/arm_dyncom_dec.h
index 70eb96e93..58784aeea 100644
--- a/src/core/arm/dyncom/arm_dyncom_dec.h
+++ b/src/core/arm/dyncom/arm_dyncom_dec.h
@@ -1,153 +1,117 @@
-/* Copyright (C) 
-* 2012 - Michael.Kang blackfin.kang@gmail.com
-* This program is free software; you can redistribute it and/or
-* modify it under the terms of the GNU General Public License
-* as published by the Free Software Foundation; either version 2
-* of the License, or (at your option) any later version.
-* 
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-* GNU General Public License for more details.
-* 
-* You should have received a copy of the GNU General Public License
-* along with this program; if not, write to the Free Software
-* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-* 
-*/
-
-/**
-* @file arm_dyncom_dec.h
-* @brief Some common utility for arm instruction decoder
-* @author Michael.Kang blackfin.kang@gmail.com
-* @version 7849
-* @date 2012-03-15
-*/
-
-#ifndef __ARM_DYNCOM_DEC__
-#define __ARM_DYNCOM_DEC__
-
-#define BITS(a,b) ((instr >> (a)) & ((1 << (1+(b)-(a)))-1))
-#define BIT(n) ((instr >> (n)) & 1)
-#define BAD	do{printf("meet BAD at %s, instr is %x\n", __FUNCTION__, instr ); /*exit(0);*/}while(0);
-#define ptr_N	cpu->ptr_N
-#define ptr_Z	cpu->ptr_Z
-#define ptr_C	cpu->ptr_C
-#define ptr_V	cpu->ptr_V
-#define ptr_I 	cpu->ptr_I
-#define ptr_T 	cpu->ptr_T
-#define	ptr_CPSR cpu->ptr_gpr[16]
-
-/* for MUL instructions */
-/*xxxx xxxx xxxx 1111 xxxx xxxx xxxx xxxx */
-#define RDHi ((instr >> 16) & 0xF)
-/*xxxx xxxx xxxx xxxx 1111 xxxx xxxx xxxx */
-#define RDLo ((instr >> 12) & 0xF)
-/*xxxx xxxx xxxx 1111 xxxx xxxx xxxx xxxx */
-#define MUL_RD ((instr >> 16) & 0xF)
-/*xxxx xxxx xxxx xxxx 1111 xxxx xxxx xxxx */
-#define MUL_RN ((instr >> 12) & 0xF)
-/*xxxx xxxx xxxx xxxx xxxx 1111 xxxx xxxx */
-#define RS ((instr >> 8) & 0xF)
-
-/*xxxx xxxx xxxx xxxx 1111 xxxx xxxx xxxx */
-#define RD ((instr >> 12) & 0xF)
-/*xxxx xxxx xxxx 1111 xxxx xxxx xxxx xxxx */
-#define RN ((instr >> 16) & 0xF)
-/*xxxx xxxx xxxx xxxx xxxx xxxx xxxx 1111 */
-#define RM (instr & 0xF)
-
-/* CP15 registers */
-#define OPCODE_1        BITS(21, 23)
-#define CRn             BITS(16, 19)
-#define CRm             BITS(0, 3)
-#define OPCODE_2        BITS(5, 7)
-
-/*xxxx xx1x xxxx xxxx xxxx xxxx xxxx xxxx */
-#define I BIT(25)
-/*xxxx xxxx xxx1 xxxx xxxx xxxx xxxx xxxx */
-#define S BIT(20)
-
-#define SHIFT BITS(5,6)
-#define SHIFT_IMM BITS(7,11)
-#define IMMH BITS(8,11)
-#define IMML BITS(0,3)
-
-#define LSPBIT  BIT(24)
-#define LSUBIT  BIT(23)
-#define LSBBIT  BIT(22)
-#define LSWBIT  BIT(21)
-#define LSLBIT  BIT(20)
-#define LSSHBITS BITS(5,6)
-#define OFFSET12 BITS(0,11)
-#define SBIT  BIT(20)
-#define DESTReg (BITS (12, 15))
-
-/* they are in unused state, give a corrent value when using */
+// Copyright 2012 Michael Kang, 2015 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#define BITS(a,b)   ((instr >> (a)) & ((1 << (1+(b)-(a)))-1))
+#define BIT(n)      ((instr >> (n)) & 1)
+#define BAD         do { printf("meet BAD at %s, instr is %x\n", __FUNCTION__, instr ); } while(0);
+#define ptr_N       cpu->ptr_N
+#define ptr_Z       cpu->ptr_Z
+#define ptr_C       cpu->ptr_C
+#define ptr_V       cpu->ptr_V
+#define ptr_I       cpu->ptr_I
+#define ptr_T       cpu->ptr_T
+#define ptr_CPSR    cpu->ptr_gpr[16]
+
+// For MUL instructions
+#define RDHi        ((instr >> 16) & 0xF)
+#define RDLo        ((instr >> 12) & 0xF)
+#define MUL_RD      ((instr >> 16) & 0xF)
+#define MUL_RN      ((instr >> 12) & 0xF)
+#define RS          ((instr >> 8) & 0xF)
+#define RD          ((instr >> 12) & 0xF)
+#define RN          ((instr >> 16) & 0xF)
+#define RM          (instr & 0xF)
+
+// CP15 registers
+#define OPCODE_1    BITS(21, 23)
+#define CRn         BITS(16, 19)
+#define CRm         BITS(0, 3)
+#define OPCODE_2    BITS(5, 7)
+
+#define I           BIT(25)
+#define S           BIT(20)
+
+#define             SHIFT BITS(5,6)
+#define             SHIFT_IMM BITS(7,11)
+#define             IMMH BITS(8,11)
+#define             IMML BITS(0,3)
+
+#define LSPBIT      BIT(24)
+#define LSUBIT      BIT(23)
+#define LSBBIT      BIT(22)
+#define LSWBIT      BIT(21)
+#define LSLBIT      BIT(20)
+#define LSSHBITS    BITS(5,6)
+#define OFFSET12    BITS(0,11)
+#define SBIT        BIT(20)
+#define DESTReg     (BITS (12, 15))
+
+// They are in unused state, give a corrent value when using
 #define IS_V5E 0
 #define IS_V5  0
 #define IS_V6  0
 #define LHSReg 0
 
-/* temp define the using the pc reg need implement a flow */
-#define STORE_CHECK_RD_PC	ADD(R(RD), CONST(INSTR_SIZE * 2))
+// Temp define the using the pc reg need implement a flow
+#define STORE_CHECK_RD_PC   ADD(R(RD), CONST(INSTR_SIZE * 2))
 
-#define OPERAND operand(cpu,instr,bb,NULL)
-#define SCO_OPERAND(sco) operand(cpu,instr,bb,sco)
-#define BOPERAND boperand(instr)
+#define OPERAND             operand(cpu,instr,bb,NULL)
+#define SCO_OPERAND(sco)    operand(cpu,instr,bb,sco)
+#define BOPERAND            boperand(instr)
 
-#define CHECK_RN_PC  (RN==15? ADD(AND(R(RN), CONST(~0x1)), CONST(INSTR_SIZE * 2)):R(RN))
-#define CHECK_RN_PC_WA  (RN==15? ADD(AND(R(RN), CONST(~0x3)), CONST(INSTR_SIZE * 2)):R(RN))
+#define CHECK_RN_PC         (RN == 15 ? ADD(AND(R(RN), CONST(~0x1)), CONST(INSTR_SIZE * 2)) : R(RN))
+#define CHECK_RN_PC_WA      (RN == 15 ? ADD(AND(R(RN), CONST(~0x3)), CONST(INSTR_SIZE * 2)) : R(RN))
 
-#define GET_USER_MODE() (OR(ICMP_EQ(R(MODE_REG), CONST(USER32MODE)), ICMP_EQ(R(MODE_REG), CONST(SYSTEM32MODE))))
+#define GET_USER_MODE()     (OR(ICMP_EQ(R(MODE_REG), CONST(USER32MODE)), ICMP_EQ(R(MODE_REG), CONST(SYSTEM32MODE))))
 
 int decode_arm_instr(uint32_t instr, int32_t *idx);
 
 enum DECODE_STATUS {
-	DECODE_SUCCESS,
-	DECODE_FAILURE
+    DECODE_SUCCESS,
+    DECODE_FAILURE
 };
 
 struct instruction_set_encoding_item {
-        const char *name;
-        int attribute_value;
-        int version;
-        u32 content[21];
+    const char *name;
+    int attribute_value;
+    int version;
+    u32 content[21];
 };
 
 typedef struct instruction_set_encoding_item ISEITEM;
 
-#define RECORD_WB(value, flag) {cpu->dyncom_engine->wb_value = value;cpu->dyncom_engine->wb_flag = flag;}
+#define RECORD_WB(value, flag) { cpu->dyncom_engine->wb_value = value;cpu->dyncom_engine->wb_flag = flag; }
 #define INIT_WB(wb_value, wb_flag) RECORD_WB(wb_value, wb_flag)
 
-#define EXECUTE_WB(base_reg)		{if(cpu->dyncom_engine->wb_flag) \
-                                               LET(base_reg, cpu->dyncom_engine->wb_value);}
-inline int get_reg_count(uint32_t instr){
-	int i =  BITS(0,15);
-	int count = 0;
-	while(i){
-		if(i & 1)
-			count ++;
-		i = i >> 1;
-	}
-	return count;
+#define EXECUTE_WB(base_reg) { if(cpu->dyncom_engine->wb_flag) LET(base_reg, cpu->dyncom_engine->wb_value); }
+
+inline int get_reg_count(uint32_t instr) {
+    int i = BITS(0, 15);
+    int count = 0;
+    while (i) {
+        if (i & 1)
+            count++;
+        i = i >> 1;
+    }
+    return count;
 }
 
 enum ARMVER {
-	INVALID = 0,
-        ARMALL,
-        ARMV4,
-        ARMV4T,
-        ARMV5T,
-        ARMV5TE,
-        ARMV5TEJ,
-        ARMV6,
-	ARM1176JZF_S,
-        ARMVFP2,
-        ARMVFP3
+    INVALID = 0,
+    ARMALL,
+    ARMV4,
+    ARMV4T,
+    ARMV5T,
+    ARMV5TE,
+    ARMV5TEJ,
+    ARMV6,
+    ARM1176JZF_S,
+    ARMVFP2,
+    ARMVFP3,
+    ARMV6K,
 };
 
-//extern const INSTRACT arm_instruction_action[];
 extern const ISEITEM arm_instruction[];
-
-#endif
diff --git a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
index c61ae0053..593e0eabd 100644
--- a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
+++ b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
@@ -622,9 +622,7 @@ void LdnStM(DecrementAfter)(arm_processor *cpu, unsigned int inst, unsigned int
     }
     unsigned int rn = CHECK_READ_REG15_WA(cpu, Rn);
     unsigned int start_addr = rn - count * 4 + 4;
-    unsigned int end_addr   = rn;
 
-    virt_addr = end_addr;
     virt_addr = start_addr;
 
     if (CondPassed(cpu, BITS(inst, 28, 31)) && BIT(inst, 21)) {
@@ -873,6 +871,8 @@ typedef struct _mvn_inst {
 typedef struct _rev_inst {
     unsigned int Rd;
     unsigned int Rm;
+    unsigned int op1;
+    unsigned int op2;
 } rev_inst;
 
 typedef struct _rsb_inst {
@@ -930,6 +930,8 @@ typedef struct _smlad_inst {
     unsigned int Rd;
     unsigned int Ra;
     unsigned int Rn;
+    unsigned int op1;
+    unsigned int op2;
 } smlad_inst;
 
 typedef struct _smla_inst {
@@ -972,6 +974,16 @@ typedef struct _smlal_inst {
     unsigned int RdLo;
 } smlal_inst;
 
+typedef struct smlald_inst {
+    unsigned int RdLo;
+    unsigned int RdHi;
+    unsigned int Rm;
+    unsigned int Rn;
+    unsigned int swap;
+    unsigned int op1;
+    unsigned int op2;
+} smlald_inst;
+
 typedef struct _mla_inst {
     unsigned int S;
     unsigned int Rn;
@@ -1067,7 +1079,7 @@ typedef struct _cdp_inst {
     unsigned int cp_num;
     unsigned int opcode_2;
     unsigned int CRm;
-    uint32 inst;
+    unsigned int inst;
 }cdp_inst;
 
 typedef struct _uxtb_inst {
@@ -1102,10 +1114,10 @@ typedef struct _blx_1_thumb {
 }blx_1_thumb;
 
 typedef struct _pkh_inst {
-    u32 Rm;
-    u32 Rn;
-    u32 Rd;
-    u8 imm;
+    unsigned int Rm;
+    unsigned int Rn;
+    unsigned int Rd;
+    unsigned char imm;
 } pkh_inst;
 
 typedef arm_inst * ARM_INST_PTR;
@@ -1407,15 +1419,19 @@ ARM_INST_PTR INTERPRETER_TRANSLATE(bx)(unsigned int inst, int index)
     arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(bx_inst));
     bx_inst *inst_cream = (bx_inst *)inst_base->component;
 
-    inst_base->cond  = BITS(inst, 28, 31);
-    inst_base->idx     = index;
-    inst_base->br     = INDIRECT_BRANCH;
+    inst_base->cond = BITS(inst, 28, 31);
+    inst_base->idx  = index;
+    inst_base->br   = INDIRECT_BRANCH;
 
-    inst_cream->Rm     = BITS(inst, 0, 3);
+    inst_cream->Rm  = BITS(inst, 0, 3);
 
     return inst_base;
 }
-ARM_INST_PTR INTERPRETER_TRANSLATE(bxj)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("BXJ"); }
+ARM_INST_PTR INTERPRETER_TRANSLATE(bxj)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(bx)(inst, index);
+}
+
 ARM_INST_PTR INTERPRETER_TRANSLATE(cdp)(unsigned int inst, int index){
     arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(cdp_inst));
     cdp_inst *inst_cream = (cdp_inst *)inst_base->component;
@@ -1738,40 +1754,31 @@ ARM_INST_PTR INTERPRETER_TRANSLATE(ldrd)(unsigned int inst, int index)
 
     return inst_base;
 }
-
 ARM_INST_PTR INTERPRETER_TRANSLATE(ldrex)(unsigned int inst, int index)
 {
-    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(ldst_inst));
-    ldst_inst *inst_cream = (ldst_inst *)inst_base->component;
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(generic_arm_inst));
+    generic_arm_inst *inst_cream = (generic_arm_inst *)inst_base->component;
 
     inst_base->cond = BITS(inst, 28, 31);
-    inst_base->idx     = index;
-    inst_base->br     = NON_BRANCH;
+    inst_base->idx  = index;
+    inst_base->br   = (BITS(inst, 12, 15) == 15) ? INDIRECT_BRANCH : NON_BRANCH; // Branch if dest is R15
 
-    inst_cream->inst = inst;
-    //inst_cream->get_addr = get_calc_addr_op(inst);
+    inst_cream->Rn = BITS(inst, 16, 19);
+    inst_cream->Rd = BITS(inst, 12, 15);
 
-    if (BITS(inst, 12, 15) == 15) {
-        inst_base->br = INDIRECT_BRANCH;
-    }
     return inst_base;
 }
 ARM_INST_PTR INTERPRETER_TRANSLATE(ldrexb)(unsigned int inst, int index)
 {
-    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(ldst_inst));
-    ldst_inst *inst_cream = (ldst_inst *)inst_base->component;
-
-    inst_base->cond = BITS(inst, 28, 31);
-    inst_base->idx     = index;
-    inst_base->br     = NON_BRANCH;
-
-    inst_cream->inst = inst;
-    inst_cream->get_addr = get_calc_addr_op(inst);
-
-    if (BITS(inst, 12, 15) == 15) {
-        inst_base->br = INDIRECT_BRANCH;
-    }
-    return inst_base;
+    return INTERPRETER_TRANSLATE(ldrex)(inst, index);
+}
+ARM_INST_PTR INTERPRETER_TRANSLATE(ldrexh)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(ldrex)(inst, index);
+}
+ARM_INST_PTR INTERPRETER_TRANSLATE(ldrexd)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(ldrex)(inst, index);
 }
 ARM_INST_PTR INTERPRETER_TRANSLATE(ldrh)(unsigned int inst, int index)
 {
@@ -2050,7 +2057,37 @@ ARM_INST_PTR INTERPRETER_TRANSLATE(pld)(unsigned int inst, int index)
 
     return inst_base;
 }
-ARM_INST_PTR INTERPRETER_TRANSLATE(qadd)(unsigned int inst, int index)     { UNIMPLEMENTED_INSTRUCTION("QADD"); }
+
+ARM_INST_PTR INTERPRETER_TRANSLATE(qadd)(unsigned int inst, int index)
+{
+    arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(generic_arm_inst));
+    generic_arm_inst* const inst_cream = (generic_arm_inst*)inst_base->component;
+
+    inst_base->cond     = BITS(inst, 28, 31);
+    inst_base->idx      = index;
+    inst_base->br       = NON_BRANCH;
+    inst_base->load_r15 = 0;
+
+    inst_cream->op1 = BITS(inst, 21, 22);
+    inst_cream->Rm  = BITS(inst, 0, 3);
+    inst_cream->Rn  = BITS(inst, 16, 19);
+    inst_cream->Rd  = BITS(inst, 12, 15);
+
+    return inst_base;
+}
+ARM_INST_PTR INTERPRETER_TRANSLATE(qdadd)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(qadd)(inst, index);
+}
+ARM_INST_PTR INTERPRETER_TRANSLATE(qdsub)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(qadd)(inst, index);
+}
+ARM_INST_PTR INTERPRETER_TRANSLATE(qsub)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(qadd)(inst, index);
+}
+
 ARM_INST_PTR INTERPRETER_TRANSLATE(qadd8)(unsigned int inst, int index)
 {
     arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(generic_arm_inst));
@@ -2077,9 +2114,6 @@ ARM_INST_PTR INTERPRETER_TRANSLATE(qaddsubx)(unsigned int inst, int index)
 {
     return INTERPRETER_TRANSLATE(qadd8)(inst, index);
 }
-ARM_INST_PTR INTERPRETER_TRANSLATE(qdadd)(unsigned int inst, int index)    { UNIMPLEMENTED_INSTRUCTION("QDADD"); }
-ARM_INST_PTR INTERPRETER_TRANSLATE(qdsub)(unsigned int inst, int index)    { UNIMPLEMENTED_INSTRUCTION("QDSUB"); }
-ARM_INST_PTR INTERPRETER_TRANSLATE(qsub)(unsigned int inst, int index)     { UNIMPLEMENTED_INSTRUCTION("QSUB"); }
 ARM_INST_PTR INTERPRETER_TRANSLATE(qsub8)(unsigned int inst, int index)
 {
     return INTERPRETER_TRANSLATE(qadd8)(inst, index);
@@ -2092,36 +2126,33 @@ ARM_INST_PTR INTERPRETER_TRANSLATE(qsubaddx)(unsigned int inst, int index)
 {
     return INTERPRETER_TRANSLATE(qadd8)(inst, index);
 }
+
 ARM_INST_PTR INTERPRETER_TRANSLATE(rev)(unsigned int inst, int index)
 {
-    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(rev_inst));
-    rev_inst *inst_cream = (rev_inst *)inst_base->component;
+    arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(rev_inst));
+    rev_inst* const inst_cream = (rev_inst*)inst_base->component;
 
-    inst_base->cond  = BITS(inst, 28, 31);
-    inst_base->idx     = index;
-    inst_base->br     = NON_BRANCH;
+    inst_base->cond     = BITS(inst, 28, 31);
+    inst_base->idx      = index;
+    inst_base->br       = NON_BRANCH;
     inst_base->load_r15 = 0;
 
-    inst_cream->Rm   = BITS(inst,  0,  3);
-    inst_cream->Rd   = BITS(inst, 12, 15);
+    inst_cream->Rm  = BITS(inst,  0,  3);
+    inst_cream->Rd  = BITS(inst, 12, 15);
+    inst_cream->op1 = BITS(inst, 20, 22);
+    inst_cream->op2 = BITS(inst, 5, 7);
 
     return inst_base;
 }
-ARM_INST_PTR INTERPRETER_TRANSLATE(rev16)(unsigned int inst, int index){
-    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(rev_inst));
-    rev_inst *inst_cream = (rev_inst *)inst_base->component;
-
-    inst_base->cond  = BITS(inst, 28, 31);
-    inst_base->idx     = index;
-    inst_base->br     = NON_BRANCH;
-    inst_base->load_r15 = 0;
-
-    inst_cream->Rm   = BITS(inst,  0,  3);
-    inst_cream->Rd   = BITS(inst, 12, 15);
-
-    return inst_base;
+ARM_INST_PTR INTERPRETER_TRANSLATE(rev16)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(rev)(inst, index);
 }
-ARM_INST_PTR INTERPRETER_TRANSLATE(revsh)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("REVSH"); }
+ARM_INST_PTR INTERPRETER_TRANSLATE(revsh)(unsigned int inst, int index)
+{
+     return INTERPRETER_TRANSLATE(rev)(inst, index);
+}
+
 ARM_INST_PTR INTERPRETER_TRANSLATE(rfe)(unsigned int inst, int index)   { UNIMPLEMENTED_INSTRUCTION("RFE"); }
 ARM_INST_PTR INTERPRETER_TRANSLATE(rsb)(unsigned int inst, int index)
 {
@@ -2171,29 +2202,45 @@ ARM_INST_PTR INTERPRETER_TRANSLATE(rsc)(unsigned int inst, int index)
     }
     return inst_base;
 }
-ARM_INST_PTR INTERPRETER_TRANSLATE(sadd8)(unsigned int inst, int index)    { UNIMPLEMENTED_INSTRUCTION("SADD8"); }
-ARM_INST_PTR INTERPRETER_TRANSLATE(sadd16)(unsigned int inst, int index)
+ARM_INST_PTR INTERPRETER_TRANSLATE(sadd8)(unsigned int inst, int index)
 {
     arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(generic_arm_inst));
     generic_arm_inst* const inst_cream = (generic_arm_inst*)inst_base->component;
-    
+
     inst_base->cond     = BITS(inst, 28, 31);
     inst_base->idx      = index;
     inst_base->br       = NON_BRANCH;
     inst_base->load_r15 = 0;
-    
+
     inst_cream->Rm  = BITS(inst, 0, 3);
     inst_cream->Rn  = BITS(inst, 16, 19);
     inst_cream->Rd  = BITS(inst, 12, 15);
     inst_cream->op1 = BITS(inst, 20, 21);
     inst_cream->op2 = BITS(inst, 5, 7);
-    
+
     return inst_base;
 }
+ARM_INST_PTR INTERPRETER_TRANSLATE(sadd16)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(sadd8)(inst, index);
+}
 ARM_INST_PTR INTERPRETER_TRANSLATE(saddsubx)(unsigned int inst, int index)
 {
-    return INTERPRETER_TRANSLATE(sadd16)(inst, index);
+    return INTERPRETER_TRANSLATE(sadd8)(inst, index);
 }
+ARM_INST_PTR INTERPRETER_TRANSLATE(ssub8)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(sadd8)(inst, index);
+}
+ARM_INST_PTR INTERPRETER_TRANSLATE(ssub16)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(sadd8)(inst, index);
+}
+ARM_INST_PTR INTERPRETER_TRANSLATE(ssubaddx)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(sadd8)(inst, index);
+}
+
 ARM_INST_PTR INTERPRETER_TRANSLATE(sbc)(unsigned int inst, int index)
 {
     arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(sbc_inst));
@@ -2236,13 +2283,48 @@ ARM_INST_PTR INTERPRETER_TRANSLATE(sel)(unsigned int inst, int index)
 
     return inst_base;
 }
+
 ARM_INST_PTR INTERPRETER_TRANSLATE(setend)(unsigned int inst, int index)    { UNIMPLEMENTED_INSTRUCTION("SETEND"); }
-ARM_INST_PTR INTERPRETER_TRANSLATE(shadd16)(unsigned int inst, int index)   { UNIMPLEMENTED_INSTRUCTION("SHADD16"); }
-ARM_INST_PTR INTERPRETER_TRANSLATE(shadd8)(unsigned int inst, int index)    { UNIMPLEMENTED_INSTRUCTION("SHADD8"); }
-ARM_INST_PTR INTERPRETER_TRANSLATE(shaddsubx)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("SHADDSUBX"); }
-ARM_INST_PTR INTERPRETER_TRANSLATE(shsub16)(unsigned int inst, int index)   { UNIMPLEMENTED_INSTRUCTION("SHSUB16"); }
-ARM_INST_PTR INTERPRETER_TRANSLATE(shsub8)(unsigned int inst, int index)    { UNIMPLEMENTED_INSTRUCTION("SHSUB8"); }
-ARM_INST_PTR INTERPRETER_TRANSLATE(shsubaddx)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("SHSUBADDX"); }
+
+ARM_INST_PTR INTERPRETER_TRANSLATE(shadd8)(unsigned int inst, int index)
+{
+    arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(generic_arm_inst));
+    generic_arm_inst* const inst_cream = (generic_arm_inst*)inst_base->component;
+
+    inst_base->cond     = BITS(inst, 28, 31);
+    inst_base->idx      = index;
+    inst_base->br       = NON_BRANCH;
+    inst_base->load_r15 = 0;
+
+    inst_cream->op1 = BITS(inst, 20, 21);
+    inst_cream->op2 = BITS(inst, 5, 7);
+    inst_cream->Rm  = BITS(inst, 0, 3);
+    inst_cream->Rn  = BITS(inst, 16, 19);
+    inst_cream->Rd  = BITS(inst, 12, 15);
+
+    return inst_base;
+}
+ARM_INST_PTR INTERPRETER_TRANSLATE(shadd16)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(shadd8)(inst, index);
+}
+ARM_INST_PTR INTERPRETER_TRANSLATE(shaddsubx)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(shadd8)(inst, index);
+}
+ARM_INST_PTR INTERPRETER_TRANSLATE(shsub8)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(shadd8)(inst, index);
+}
+ARM_INST_PTR INTERPRETER_TRANSLATE(shsub16)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(shadd8)(inst, index);
+}
+ARM_INST_PTR INTERPRETER_TRANSLATE(shsubaddx)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(shadd8)(inst, index);
+}
+
 ARM_INST_PTR INTERPRETER_TRANSLATE(smla)(unsigned int inst, int index)
 {
     arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(smla_inst));
@@ -2262,25 +2344,40 @@ ARM_INST_PTR INTERPRETER_TRANSLATE(smla)(unsigned int inst, int index)
 
     return inst_base;
 }
-ARM_INST_PTR INTERPRETER_TRANSLATE(smlad)(unsigned int inst, int index){
-    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(smlad_inst));
-    smlad_inst *inst_cream = (smlad_inst *)inst_base->component;
 
-    inst_base->cond  = BITS(inst, 28, 31);
-    inst_base->idx     = index;
-    inst_base->br     = NON_BRANCH;
+ARM_INST_PTR INTERPRETER_TRANSLATE(smlad)(unsigned int inst, int index)
+{
+    arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(smlad_inst));
+    smlad_inst* const inst_cream = (smlad_inst*)inst_base->component;
+
+    inst_base->cond     = BITS(inst, 28, 31);
+    inst_base->idx      = index;
+    inst_base->br       = NON_BRANCH;
     inst_base->load_r15 = 0;
 
-    inst_cream->m     = BIT(inst, 4);
-    inst_cream->Rn     = BITS(inst, 0, 3);
-    inst_cream->Rm     = BITS(inst, 8, 11);
-    inst_cream->Rd = BITS(inst, 16, 19);
-    inst_cream->Ra = BITS(inst, 12, 15);
+    inst_cream->m   = BIT(inst, 5);
+    inst_cream->Rn  = BITS(inst, 0, 3);
+    inst_cream->Rm  = BITS(inst, 8, 11);
+    inst_cream->Rd  = BITS(inst, 16, 19);
+    inst_cream->Ra  = BITS(inst, 12, 15);
+    inst_cream->op1 = BITS(inst, 20, 22);
+    inst_cream->op2 = BITS(inst, 5, 7);
 
-    if (CHECK_RM ) 
-        inst_base->load_r15 = 1;
     return inst_base;
 }
+ARM_INST_PTR INTERPRETER_TRANSLATE(smuad)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(smlad)(inst, index);
+}
+ARM_INST_PTR INTERPRETER_TRANSLATE(smusd)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(smlad)(inst, index);
+}
+ARM_INST_PTR INTERPRETER_TRANSLATE(smlsd)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(smlad)(inst, index);
+}
+
 ARM_INST_PTR INTERPRETER_TRANSLATE(smlal)(unsigned int inst, int index)
 {
     arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(umlal_inst));
@@ -2301,15 +2398,82 @@ ARM_INST_PTR INTERPRETER_TRANSLATE(smlal)(unsigned int inst, int index)
         inst_base->load_r15 = 1;
     return inst_base;
 }
+
 ARM_INST_PTR INTERPRETER_TRANSLATE(smlalxy)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("SMLALXY"); }
-ARM_INST_PTR INTERPRETER_TRANSLATE(smlald)(unsigned int inst, int index)  { UNIMPLEMENTED_INSTRUCTION("SMLALD"); }
-ARM_INST_PTR INTERPRETER_TRANSLATE(smlaw)(unsigned int inst, int index)   { UNIMPLEMENTED_INSTRUCTION("SMLAW"); }
-ARM_INST_PTR INTERPRETER_TRANSLATE(smlsd)(unsigned int inst, int index)   { UNIMPLEMENTED_INSTRUCTION("SMLSD"); }
-ARM_INST_PTR INTERPRETER_TRANSLATE(smlsld)(unsigned int inst, int index)  { UNIMPLEMENTED_INSTRUCTION("SMLSLD"); }
-ARM_INST_PTR INTERPRETER_TRANSLATE(smmla)(unsigned int inst, int index)   { UNIMPLEMENTED_INSTRUCTION("SMMLA"); }
-ARM_INST_PTR INTERPRETER_TRANSLATE(smmls)(unsigned int inst, int index)   { UNIMPLEMENTED_INSTRUCTION("SMMLS"); }
-ARM_INST_PTR INTERPRETER_TRANSLATE(smmul)(unsigned int inst, int index)   { UNIMPLEMENTED_INSTRUCTION("SMMUL"); }
-ARM_INST_PTR INTERPRETER_TRANSLATE(smuad)(unsigned int inst, int index)   { UNIMPLEMENTED_INSTRUCTION("SMUAD"); }
+
+ARM_INST_PTR INTERPRETER_TRANSLATE(smlaw)(unsigned int inst, int index)
+{
+    arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(smlad_inst));
+    smlad_inst* const inst_cream = (smlad_inst*)inst_base->component;
+
+    inst_base->cond     = BITS(inst, 28, 31);
+    inst_base->idx      = index;
+    inst_base->br       = NON_BRANCH;
+    inst_base->load_r15 = 0;
+
+    inst_cream->Ra = BITS(inst, 12, 15);
+    inst_cream->Rm = BITS(inst, 8, 11);
+    inst_cream->Rn = BITS(inst, 0, 3);
+    inst_cream->Rd = BITS(inst, 16, 19);
+    inst_cream->m  = BIT(inst, 6);
+
+    return inst_base;
+}
+
+ARM_INST_PTR INTERPRETER_TRANSLATE(smlald)(unsigned int inst, int index)
+{
+    arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(smlald_inst));
+    smlald_inst* const inst_cream = (smlald_inst*)inst_base->component;
+
+    inst_base->cond     = BITS(inst, 28, 31);
+    inst_base->idx      = index;
+    inst_base->br       = NON_BRANCH;
+    inst_base->load_r15 = 0;
+
+    inst_cream->Rm   = BITS(inst, 8, 11);
+    inst_cream->Rn   = BITS(inst, 0, 3);
+    inst_cream->RdLo = BITS(inst, 12, 15);
+    inst_cream->RdHi = BITS(inst, 16, 19);
+    inst_cream->swap = BIT(inst, 5);
+    inst_cream->op1  = BITS(inst, 20, 22);
+    inst_cream->op2  = BITS(inst, 5, 7);
+
+    return inst_base;
+}
+ARM_INST_PTR INTERPRETER_TRANSLATE(smlsld)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(smlald)(inst, index);
+}
+
+ARM_INST_PTR INTERPRETER_TRANSLATE(smmla)(unsigned int inst, int index)
+{
+    arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(smlad_inst));
+    smlad_inst* const inst_cream = (smlad_inst*)inst_base->component;
+
+    inst_base->cond     = BITS(inst, 28, 31);
+    inst_base->idx      = index;
+    inst_base->br       = NON_BRANCH;
+    inst_base->load_r15 = 0;
+
+    inst_cream->m   = BIT(inst, 5);
+    inst_cream->Ra  = BITS(inst, 12, 15);
+    inst_cream->Rm  = BITS(inst, 8, 11);
+    inst_cream->Rn  = BITS(inst, 0, 3);
+    inst_cream->Rd  = BITS(inst, 16, 19);
+    inst_cream->op1 = BITS(inst, 20, 22);
+    inst_cream->op2 = BITS(inst, 5, 7);
+
+    return inst_base;
+}
+ARM_INST_PTR INTERPRETER_TRANSLATE(smmls)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(smmla)(inst, index);
+}
+ARM_INST_PTR INTERPRETER_TRANSLATE(smmul)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(smmla)(inst, index);
+}
+
 ARM_INST_PTR INTERPRETER_TRANSLATE(smul)(unsigned int inst, int index)
 {
     arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(smul_inst));
@@ -2372,7 +2536,6 @@ ARM_INST_PTR INTERPRETER_TRANSLATE(smulw)(unsigned int inst, int index)
         inst_base->load_r15 = 1;
     return inst_base;
 }
-ARM_INST_PTR INTERPRETER_TRANSLATE(smusd)(unsigned int inst, int index)    { UNIMPLEMENTED_INSTRUCTION("SMUSD"); }
 ARM_INST_PTR INTERPRETER_TRANSLATE(srs)(unsigned int inst, int index)      { UNIMPLEMENTED_INSTRUCTION("SRS"); }
 ARM_INST_PTR INTERPRETER_TRANSLATE(ssat)(unsigned int inst, int index)
 {
@@ -2408,15 +2571,7 @@ ARM_INST_PTR INTERPRETER_TRANSLATE(ssat16)(unsigned int inst, int index)
 
     return inst_base;
 }
-ARM_INST_PTR INTERPRETER_TRANSLATE(ssub8)(unsigned int inst, int index)    { UNIMPLEMENTED_INSTRUCTION("SSUB8"); }
-ARM_INST_PTR INTERPRETER_TRANSLATE(ssub16)(unsigned int inst, int index)
-{
-    return INTERPRETER_TRANSLATE(sadd16)(inst, index);
-}
-ARM_INST_PTR INTERPRETER_TRANSLATE(ssubaddx)(unsigned int inst, int index)
-{
-    return INTERPRETER_TRANSLATE(sadd16)(inst, index);
-}
+
 ARM_INST_PTR INTERPRETER_TRANSLATE(stc)(unsigned int inst, int index)
 {
     arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(stc_inst));
@@ -2566,37 +2721,30 @@ ARM_INST_PTR INTERPRETER_TRANSLATE(strd)(unsigned int inst, int index){
 }
 ARM_INST_PTR INTERPRETER_TRANSLATE(strex)(unsigned int inst, int index)
 {
-    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(ldst_inst));
-    ldst_inst *inst_cream = (ldst_inst *)inst_base->component;
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(generic_arm_inst));
+    generic_arm_inst *inst_cream = (generic_arm_inst *)inst_base->component;
 
     inst_base->cond = BITS(inst, 28, 31);
-    inst_base->idx     = index;
-    inst_base->br     = NON_BRANCH;
+    inst_base->idx  = index;
+    inst_base->br   = NON_BRANCH;
 
-    inst_cream->inst = inst;
-    inst_cream->get_addr = get_calc_addr_op(inst);
+    inst_cream->Rn  = BITS(inst, 16, 19);
+    inst_cream->Rd  = BITS(inst, 12, 15);
+    inst_cream->Rm  = BITS(inst, 0,   3);
 
-    if (BITS(inst, 12, 15) == 15) {
-        inst_base->br = INDIRECT_BRANCH;
-    }
     return inst_base;
 }
 ARM_INST_PTR INTERPRETER_TRANSLATE(strexb)(unsigned int inst, int index)
 {
-    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(ldst_inst));
-    ldst_inst *inst_cream = (ldst_inst *)inst_base->component;
-
-    inst_base->cond = BITS(inst, 28, 31);
-    inst_base->idx     = index;
-    inst_base->br     = NON_BRANCH;
-
-    inst_cream->inst = inst;
-    inst_cream->get_addr = get_calc_addr_op(inst);
-
-    if (BITS(inst, 12, 15) == 15) {
-        inst_base->br = INDIRECT_BRANCH;
-    }
-    return inst_base;
+    return INTERPRETER_TRANSLATE(strex)(inst, index);
+}
+ARM_INST_PTR INTERPRETER_TRANSLATE(strexh)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(strex)(inst, index);
+}
+ARM_INST_PTR INTERPRETER_TRANSLATE(strexd)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(strex)(inst, index);
 }
 ARM_INST_PTR INTERPRETER_TRANSLATE(strh)(unsigned int inst, int index)
 {
@@ -2723,7 +2871,29 @@ ARM_INST_PTR INTERPRETER_TRANSLATE(sxtab)(unsigned int inst, int index){
 
     return inst_base;
 }
-ARM_INST_PTR INTERPRETER_TRANSLATE(sxtab16)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("SXTAB16"); }
+
+ARM_INST_PTR INTERPRETER_TRANSLATE(sxtab16)(unsigned int inst, int index)
+{
+    arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(sxtab_inst));
+    sxtab_inst* const inst_cream = (sxtab_inst*)inst_base->component;
+
+    inst_base->cond     = BITS(inst, 28, 31);
+    inst_base->idx      = index;
+    inst_base->br       = NON_BRANCH;
+    inst_base->load_r15 = 0;
+
+    inst_cream->Rm = BITS(inst, 0, 3);
+    inst_cream->Rn = BITS(inst, 16, 19);
+    inst_cream->Rd = BITS(inst, 12, 15);
+    inst_cream->rotate = BITS(inst, 10, 11);
+
+    return inst_base;
+}
+ARM_INST_PTR INTERPRETER_TRANSLATE(sxtb16)(unsigned int inst, int index)
+{
+    return INTERPRETER_TRANSLATE(sxtab16)(inst, index);
+}
+
 ARM_INST_PTR INTERPRETER_TRANSLATE(sxtah)(unsigned int inst, int index){
     LOG_WARNING(Core_ARM11, "SXTAH untested");
     arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(sxtah_inst));
@@ -2741,7 +2911,7 @@ ARM_INST_PTR INTERPRETER_TRANSLATE(sxtah)(unsigned int inst, int index){
 
     return inst_base;
 }
-ARM_INST_PTR INTERPRETER_TRANSLATE(sxtb16)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("SXTB16"); }
+
 ARM_INST_PTR INTERPRETER_TRANSLATE(teq)(unsigned int inst, int index)
 {
     arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(teq_inst));
@@ -3276,6 +3446,11 @@ const transop_fp_t arm_instruction_trans[] = {
     INTERPRETER_TRANSLATE(ldc),
     INTERPRETER_TRANSLATE(swi),
     INTERPRETER_TRANSLATE(bbl),
+    INTERPRETER_TRANSLATE(ldrexd),
+    INTERPRETER_TRANSLATE(strexd),
+    INTERPRETER_TRANSLATE(ldrexh),
+    INTERPRETER_TRANSLATE(strexh),
+
     // All the thumb instructions should be placed the end of table
     INTERPRETER_TRANSLATE(b_2_thumb), 
     INTERPRETER_TRANSLATE(b_cond_thumb), 
@@ -3314,7 +3489,7 @@ static tdstate decode_thumb_instr(arm_processor *cpu, uint32_t inst, addr_t addr
     tdstate ret = thumb_translate (addr, inst, arm_inst, inst_size);
     if(ret == t_branch){
         // TODO: FIXME, endian should be judged
-        uint32 tinstr;
+        u32 tinstr;
         if((addr & 0x3) != 0)
             tinstr = inst >> 16;
         else
@@ -3327,7 +3502,7 @@ static tdstate decode_thumb_instr(arm_processor *cpu, uint32_t inst, addr_t addr
         case 26:
         case 27:
             if (((tinstr & 0x0F00) != 0x0E00) && ((tinstr & 0x0F00) != 0x0F00)){
-                uint32 cond = (tinstr & 0x0F00) >> 8;
+                u32 cond = (tinstr & 0x0F00) >> 8;
                 inst_index = table_length - 4;
                 *ptr_inst_base = arm_instruction_trans[inst_index](tinstr, inst_index);
             } else {
@@ -3364,8 +3539,6 @@ static tdstate decode_thumb_instr(arm_processor *cpu, uint32_t inst, addr_t addr
     return ret;
 }
 
-unsigned int *InstLength;
-
 enum {
     KEEP_GOING,
     FETCH_EXCEPTION
@@ -3450,28 +3623,6 @@ translated:
 
 #define LOG_IN_CLR    skyeye_printf_in_color
 
-int cmp(const void *x, const void *y) {
-    return *(unsigned long long int*)x - *(unsigned long long int *)y;
-}
-
-void InterpreterInitInstLength(unsigned long long int *ptr, size_t size) {
-    int array_size = size / sizeof(void *);
-    unsigned long long int *InstLabel = new unsigned long long int[array_size];
-    memcpy(InstLabel, ptr, size);
-    qsort(InstLabel, array_size, sizeof(void *), cmp);
-    InstLength = new unsigned int[array_size - 4];
-    for (int i = 0; i < array_size - 4; i++) {
-        for (int j = 0; j < array_size; j++) {
-            if (ptr[i] == InstLabel[j]) {
-                InstLength[i] = InstLabel[j + 1] - InstLabel[j];
-                break;
-            }
-        }
-    }
-    for (int i = 0; i < array_size - 4; i++)
-        LOG_DEBUG(Core_ARM11, "[%d]:%d", i, InstLength[i]);
-}
-
 int clz(unsigned int x) {
     int n;
     if (x == 0) return (32);
@@ -3496,6 +3647,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
     #define CRm             inst_cream->crm
     #define CP15_REG(n)     cpu->CP15[CP15(n)]
     #define RD              cpu->Reg[inst_cream->Rd]
+    #define RD2             cpu->Reg[inst_cream->Rd + 1]
     #define RN              cpu->Reg[inst_cream->Rn]
     #define RM              cpu->Reg[inst_cream->Rm]
     #define RS              cpu->Reg[inst_cream->Rs]
@@ -3707,14 +3859,18 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
     case 182: goto LDC_INST; \
     case 183: goto SWI_INST; \
     case 184: goto BBL_INST; \
-    case 185: goto B_2_THUMB ; \
-    case 186: goto B_COND_THUMB ; \
-    case 187: goto BL_1_THUMB ; \
-    case 188: goto BL_2_THUMB ; \
-    case 189: goto BLX_1_THUMB ; \
-    case 190: goto DISPATCH; \
-    case 191: goto INIT_INST_LENGTH; \
-    case 192: goto END; \
+    case 185: goto LDREXD_INST; \
+    case 186: goto STREXD_INST; \
+    case 187: goto LDREXH_INST; \
+    case 188: goto STREXH_INST; \
+    case 189: goto B_2_THUMB ; \
+    case 190: goto B_COND_THUMB ; \
+    case 191: goto BL_1_THUMB ; \
+    case 192: goto BL_2_THUMB ; \
+    case 193: goto BLX_1_THUMB ; \
+    case 194: goto DISPATCH; \
+    case 195: goto INIT_INST_LENGTH; \
+    case 196: goto END; \
     }
 #endif
 
@@ -3775,8 +3931,9 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
         &&MLA_INST,&&SSAT_INST,&&USAT_INST,&&MRS_INST,&&MSR_INST,&&AND_INST,&&BIC_INST,&&LDM_INST,&&EOR_INST,&&ADD_INST,&&RSB_INST,&&RSC_INST,
         &&SBC_INST,&&ADC_INST,&&SUB_INST,&&ORR_INST,&&MVN_INST,&&MOV_INST,&&STM_INST,&&LDM_INST,&&LDRSH_INST,&&STM_INST,&&LDM_INST,&&LDRSB_INST,
         &&STRD_INST,&&LDRH_INST,&&STRH_INST,&&LDRD_INST,&&STRT_INST,&&STRBT_INST,&&LDRBT_INST,&&LDRT_INST,&&MRC_INST,&&MCR_INST,&&MSR_INST,
-        &&LDRB_INST,&&STRB_INST,&&LDR_INST,&&LDRCOND_INST, &&STR_INST,&&CDP_INST,&&STC_INST,&&LDC_INST,&&SWI_INST,&&BBL_INST,&&B_2_THUMB, &&B_COND_THUMB, 
-        &&BL_1_THUMB, &&BL_2_THUMB, &&BLX_1_THUMB, &&DISPATCH,&&INIT_INST_LENGTH,&&END
+        &&LDRB_INST,&&STRB_INST,&&LDR_INST,&&LDRCOND_INST, &&STR_INST,&&CDP_INST,&&STC_INST,&&LDC_INST,&&SWI_INST,&&BBL_INST,&&LDREXD_INST,
+        &&STREXD_INST,&&LDREXH_INST,&&STREXH_INST,&&B_2_THUMB, &&B_COND_THUMB,&&BL_1_THUMB, &&BL_2_THUMB, &&BLX_1_THUMB, &&DISPATCH,
+        &&INIT_INST_LENGTH,&&END
         };
 #endif
     arm_inst * inst_base;
@@ -3976,22 +4133,35 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
         INC_PC(sizeof(blx_inst));
         goto DISPATCH;
     }
+
     BX_INST:
+    BXJ_INST:
     {
-        bx_inst *inst_cream = (bx_inst *)inst_base->component;
-        if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+        // Note that only the 'fail' case of BXJ is emulated. This is because
+        // the facilities for Jazelle emulation are not implemented.
+        //
+        // According to the ARM documentation on BXJ, if setting the J bit in the APSR
+        // fails, then BXJ functions identically like a regular BX instruction.
+		//
+		// This is sufficient for citra, as the CPU for the 3DS does not implement Jazelle.
+
+        if (inst_base->cond == 0xE || CondPassed(cpu, inst_base->cond)) {
+            bx_inst* const inst_cream = (bx_inst*)inst_base->component;
+
             if (inst_cream->Rm == 15)
                 LOG_WARNING(Core_ARM11, "BX at pc %x: use of Rm = R15 is discouraged", cpu->Reg[15]);
+
             cpu->TFlag = cpu->Reg[inst_cream->Rm] & 0x1;
             cpu->Reg[15] = cpu->Reg[inst_cream->Rm] & 0xfffffffe;
             INC_PC(sizeof(bx_inst));
             goto DISPATCH;
         }
+
         cpu->Reg[15] += GET_INST_SIZE(cpu);
         INC_PC(sizeof(bx_inst));
         goto DISPATCH;
     }
-    BXJ_INST:
+
     CDP_INST:
     {
         cdp_inst *inst_cream = (cdp_inst *)inst_base->component;
@@ -4377,45 +4547,84 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
 
     LDREX_INST:
     {
-        ldst_inst *inst_cream = (ldst_inst *)inst_base->component;
+        generic_arm_inst* inst_cream = (generic_arm_inst*)inst_base->component;
         if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
-            addr = cpu->Reg[BITS(inst_cream->inst, 16, 19)];
+            unsigned int read_addr = RN;
 
-            unsigned int value = Memory::Read32(addr);
-
-            add_exclusive_addr(cpu, addr);
+            add_exclusive_addr(cpu, read_addr);
             cpu->exclusive_state = 1;
 
-            cpu->Reg[BITS(inst_cream->inst, 12, 15)] = value;
-            if (BITS(inst_cream->inst, 12, 15) == 15) {
-                INC_PC(sizeof(ldst_inst));
+            RD = Memory::Read32(read_addr);
+            if (inst_cream->Rd == 15) {
+                INC_PC(sizeof(generic_arm_inst));
                 goto DISPATCH;
             }
         }
         cpu->Reg[15] += GET_INST_SIZE(cpu);
-        INC_PC(sizeof(ldst_inst));
+        INC_PC(sizeof(generic_arm_inst));
         FETCH_INST;
         GOTO_NEXT_INST;
     }
     LDREXB_INST:
     {
-        ldst_inst *inst_cream = (ldst_inst *)inst_base->component;
+        generic_arm_inst* inst_cream = (generic_arm_inst*)inst_base->component;
         if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
-            addr = cpu->Reg[BITS(inst_cream->inst, 16, 19)];
+            unsigned int read_addr = RN;
 
-            unsigned int value = Memory::Read8(addr);
+            add_exclusive_addr(cpu, read_addr);
+            cpu->exclusive_state = 1;
+
+            RD = Memory::Read8(read_addr);
+            if (inst_cream->Rd == 15) {
+                INC_PC(sizeof(generic_arm_inst));
+                goto DISPATCH;
+            }
+        }
+        cpu->Reg[15] += GET_INST_SIZE(cpu);
+        INC_PC(sizeof(generic_arm_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    LDREXH_INST:
+    {
+        generic_arm_inst* inst_cream = (generic_arm_inst*)inst_base->component;
+        if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+            unsigned int read_addr = RN;
 
-            add_exclusive_addr(cpu, addr);
+            add_exclusive_addr(cpu, read_addr);
             cpu->exclusive_state = 1;
 
-            cpu->Reg[BITS(inst_cream->inst, 12, 15)] = value;
-            if (BITS(inst_cream->inst, 12, 15) == 15) {
-                INC_PC(sizeof(ldst_inst));
+            RD = Memory::Read16(read_addr);
+            if (inst_cream->Rd == 15) {
+                INC_PC(sizeof(generic_arm_inst));
                 goto DISPATCH;
             }
         }
         cpu->Reg[15] += GET_INST_SIZE(cpu);
-        INC_PC(sizeof(ldst_inst));
+        INC_PC(sizeof(generic_arm_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    LDREXD_INST:
+    {
+        generic_arm_inst* inst_cream = (generic_arm_inst*)inst_base->component;
+        if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+            unsigned int read_addr = RN;
+
+            add_exclusive_addr(cpu, read_addr);
+            cpu->exclusive_state = 1;
+            // TODO(bunnei): Do we need to also make [read_addr + 4] exclusive?
+
+            RD = Memory::Read32(read_addr);
+            RD2 = Memory::Read32(read_addr + 4);
+
+            if (inst_cream->Rd == 15) {
+                INC_PC(sizeof(generic_arm_inst));
+                goto DISPATCH;
+            }
+        }
+        cpu->Reg[15] += GET_INST_SIZE(cpu);
+        INC_PC(sizeof(generic_arm_inst));
         FETCH_INST;
         GOTO_NEXT_INST;
     }
@@ -4877,6 +5086,78 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
     }
 
     QADD_INST:
+    QDADD_INST:
+    QDSUB_INST:
+    QSUB_INST:
+    {
+        if (inst_base->cond == 0xE || CondPassed(cpu, inst_base->cond)) {
+            generic_arm_inst* const inst_cream = (generic_arm_inst*)inst_base->component;
+            const u8 op1 = inst_cream->op1;
+            const u32 rm_val = RM;
+            const u32 rn_val = RN;
+
+            u32 result = 0;
+
+            // QADD
+            if (op1 == 0x00) {
+                result = rm_val + rn_val;
+
+                if (AddOverflow(rm_val, rn_val, result)) {
+                    result = POS(result) ? 0x80000000 : 0x7FFFFFFF;
+                    cpu->Cpsr |= (1 << 27);
+                }
+            }
+            // QSUB
+            else if (op1 == 0x01) {
+                result = rm_val - rn_val;
+
+                if (SubOverflow(rm_val, rn_val, result)) {
+                    result = POS(result) ? 0x80000000 : 0x7FFFFFFF;
+                    cpu->Cpsr |= (1 << 27);
+                }
+            }
+            // QDADD
+            else if (op1 == 0x02) {
+                u32 mul = (rn_val * 2);
+
+                if (AddOverflow(rn_val, rn_val, rn_val * 2)) {
+                    mul = POS(mul) ? 0x80000000 : 0x7FFFFFFF;
+                    cpu->Cpsr |= (1 << 27);
+                }
+
+                result = mul + rm_val;
+
+                if (AddOverflow(rm_val, mul, result)) {
+                    result = POS(result) ? 0x80000000 : 0x7FFFFFFF;
+                    cpu->Cpsr |= (1 << 27);
+                }
+            }
+            // QDSUB
+            else if (op1 == 0x03) {
+                u32 mul = (rn_val * 2);
+
+                if (AddOverflow(rn_val, rn_val, mul)) {
+                    mul = POS(mul) ? 0x80000000 : 0x7FFFFFFF;
+                    cpu->Cpsr |= (1 << 27);
+                }
+
+                result = rm_val - mul;
+
+                if (SubOverflow(rm_val, mul, result)) {
+                    result = POS(result) ? 0x80000000 : 0x7FFFFFFF;
+                    cpu->Cpsr |= (1 << 27);
+                }
+            }
+
+            RD = result;
+        }
+
+        cpu->Reg[15] += GET_INST_SIZE(cpu);
+        INC_PC(sizeof(generic_arm_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
     QADD8_INST:
     QADD16_INST:
     QADDSUBX_INST:
@@ -4939,42 +5220,39 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
         GOTO_NEXT_INST;
     }
 
-    QDADD_INST:
-    QDSUB_INST:
-    QSUB_INST:
     REV_INST:
-    {
-        rev_inst *inst_cream = (rev_inst *)inst_base->component;
-        if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
-            RD = ((RM & 0xff) << 24) |
-                (((RM >> 8) & 0xff) << 16) |
-                (((RM >> 16) & 0xff) << 8) |
-                ((RM >> 24) & 0xff);
-            if (inst_cream->Rm == 15) {
-                LOG_ERROR(Core_ARM11, "invalid operand for REV");
-                CITRA_IGNORE_EXIT(-1);
-            }
-        }
-        cpu->Reg[15] += GET_INST_SIZE(cpu);
-        INC_PC(sizeof(rev_inst));
-        FETCH_INST;
-        GOTO_NEXT_INST;
-    }
     REV16_INST:
+    REVSH_INST:
     {
-        rev_inst *inst_cream = (rev_inst *)inst_base->component;
-        if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
-            RD = (BITS(RM, 0, 7) << 8) | 
-                BITS(RM, 8, 15) |
-                (BITS(RM, 16, 23) << 24) |
-                (BITS(RM, 24, 31) << 16);
+        
+        if (inst_base->cond == 0xE || CondPassed(cpu, inst_base->cond)) {
+            rev_inst* const inst_cream = (rev_inst*)inst_base->component;
+
+            const u8 op1 = inst_cream->op1;
+            const u8 op2 = inst_cream->op2;
+
+            // REV
+            if (op1 == 0x03 && op2 == 0x01) {
+                RD = ((RM & 0xFF) << 24) | (((RM >> 8) & 0xFF) << 16) | (((RM >> 16) & 0xFF) << 8) | ((RM >> 24) & 0xFF);
+            }
+            // REV16
+            else if (op1 == 0x03 && op2 == 0x05) {
+                RD = ((RM & 0xFF) << 8) | ((RM & 0xFF00) >> 8) | ((RM & 0xFF0000) << 8) | ((RM & 0xFF000000) >> 8);
+            }
+            // REVSH
+            else if (op1 == 0x07 && op2 == 0x05) {
+                RD = ((RM & 0xFF) << 8) | ((RM & 0xFF00) >> 8);
+                if (RD & 0x8000)
+                    RD |= 0xffff0000;
+            }
         }
+
         cpu->Reg[15] += GET_INST_SIZE(cpu);
         INC_PC(sizeof(rev_inst));
         FETCH_INST;
         GOTO_NEXT_INST;
     }
-    REVSH_INST:
+
     RFE_INST:
     RSB_INST:
     {
@@ -5039,6 +5317,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
     }
 
     SADD8_INST:
+    SSUB8_INST:
     SADD16_INST:
     SADDSUBX_INST:
     SSUBADDX_INST:
@@ -5046,52 +5325,96 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
     {
         if (inst_base->cond == 0xE || CondPassed(cpu, inst_base->cond)) {
             generic_arm_inst* const inst_cream = (generic_arm_inst*)inst_base->component;
+            const u8 op2 = inst_cream->op2;
 
-            const s16 rn_lo = (RN & 0xFFFF);
-            const s16 rn_hi = ((RN >> 16) & 0xFFFF);
-            const s16 rm_lo = (RM & 0xFFFF);
-            const s16 rm_hi = ((RM >> 16) & 0xFFFF);
+            if (op2 == 0x00 || op2 == 0x01 || op2 == 0x02 || op2 == 0x03) {
+                const s16 rn_lo = (RN & 0xFFFF);
+                const s16 rn_hi = ((RN >> 16) & 0xFFFF);
+                const s16 rm_lo = (RM & 0xFFFF);
+                const s16 rm_hi = ((RM >> 16) & 0xFFFF);
 
-            s32 lo_result = 0;
-            s32 hi_result = 0;
+                s32 lo_result = 0;
+                s32 hi_result = 0;
 
-            // SADD16
-            if (inst_cream->op2 == 0x00) {
-                lo_result = (rn_lo + rm_lo);
-                hi_result = (rn_hi + rm_hi);
-            }
-            // SASX
-            else if (inst_cream->op2 == 0x01) {
-                lo_result = (rn_lo - rm_hi);
-                hi_result = (rn_hi + rm_lo);
-            }
-            // SSAX
-            else if (inst_cream->op2 == 0x02) {
-                lo_result = (rn_lo + rm_hi);
-                hi_result = (rn_hi - rm_lo);
-            }
-            // SSUB16
-            else if (inst_cream->op2 == 0x03) {
-                lo_result = (rn_lo - rm_lo);
-                hi_result = (rn_hi - rm_hi);
-            }
+                // SADD16
+                if (inst_cream->op2 == 0x00) {
+                    lo_result = (rn_lo + rm_lo);
+                    hi_result = (rn_hi + rm_hi);
+                }
+                // SASX
+                else if (op2 == 0x01) {
+                    lo_result = (rn_lo - rm_hi);
+                    hi_result = (rn_hi + rm_lo);
+                }
+                // SSAX
+                else if (op2 == 0x02) {
+                    lo_result = (rn_lo + rm_hi);
+                    hi_result = (rn_hi - rm_lo);
+                }
+                // SSUB16
+                else if (op2 == 0x03) {
+                    lo_result = (rn_lo - rm_lo);
+                    hi_result = (rn_hi - rm_hi);
+                }
 
-            RD = (lo_result & 0xFFFF) | ((hi_result & 0xFFFF) << 16);
+                RD = (lo_result & 0xFFFF) | ((hi_result & 0xFFFF) << 16);
 
-            if (lo_result >= 0) {
-                cpu->Cpsr |= (1 << 16);
-                cpu->Cpsr |= (1 << 17);
-            } else {
-                cpu->Cpsr &= ~(1 << 16);
-                cpu->Cpsr &= ~(1 << 17);
+                if (lo_result >= 0) {
+                    cpu->Cpsr |= (1 << 16);
+                    cpu->Cpsr |= (1 << 17);
+                } else {
+                    cpu->Cpsr &= ~(1 << 16);
+                    cpu->Cpsr &= ~(1 << 17);
+                }
+
+                if (hi_result >= 0) {
+                    cpu->Cpsr |= (1 << 18);
+                    cpu->Cpsr |= (1 << 19);
+                } else {
+                    cpu->Cpsr &= ~(1 << 18);
+                    cpu->Cpsr &= ~(1 << 19);
+                }
             }
+            else if (op2 == 0x04 || op2 == 0x07) {
+                s32 lo_val1, lo_val2;
+                s32 hi_val1, hi_val2;
 
-            if (hi_result >= 0) {
-                cpu->Cpsr |= (1 << 18);
-                cpu->Cpsr |= (1 << 19);
-            } else {
-                cpu->Cpsr &= ~(1 << 18);
-                cpu->Cpsr &= ~(1 << 19);
+                // SADD8
+                if (op2 == 0x04) {
+                    lo_val1 = (s32)(s8)(RN & 0xFF) + (s32)(s8)(RM & 0xFF);
+                    lo_val2 = (s32)(s8)((RN >> 8) & 0xFF)  + (s32)(s8)((RM >> 8) & 0xFF);
+                    hi_val1 = (s32)(s8)((RN >> 16) & 0xFF) + (s32)(s8)((RM >> 16) & 0xFF);
+                    hi_val2 = (s32)(s8)((RN >> 24) & 0xFF) + (s32)(s8)((RM >> 24) & 0xFF);
+                }
+                // SSUB8
+                else {
+                    lo_val1 = (s32)(s8)(RN & 0xFF) - (s32)(s8)(RM & 0xFF);
+                    lo_val2 = (s32)(s8)((RN >> 8) & 0xFF) - (s32)(s8)((RM >> 8) & 0xFF);
+                    hi_val1 = (s32)(s8)((RN >> 16) & 0xFF) - (s32)(s8)((RM >> 16) & 0xFF);
+                    hi_val2 = (s32)(s8)((RN >> 24) & 0xFF) - (s32)(s8)((RM >> 24) & 0xFF);
+                }
+
+                RD =  ((lo_val1 & 0xFF) | ((lo_val2 & 0xFF) << 8) | ((hi_val1 & 0xFF) << 16) | ((hi_val2 & 0xFF) << 24));
+
+                if (lo_val1 >= 0)
+                    cpu->Cpsr |= (1 << 16);
+                else
+                    cpu->Cpsr &= ~(1 << 16);
+
+                if (lo_val2 >= 0)
+                    cpu->Cpsr |= (1 << 17);
+                else
+                    cpu->Cpsr &= ~(1 << 17);
+
+                if (hi_val1 >= 0)
+                    cpu->Cpsr |= (1 << 18);
+                else
+                    cpu->Cpsr &= ~(1 << 18);
+
+                if (hi_val2 >= 0)
+                    cpu->Cpsr |= (1 << 19);
+                else
+                    cpu->Cpsr &= ~(1 << 19);
             }
         }
 
@@ -5176,12 +5499,79 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
     }
 
     SETEND_INST:
-    SHADD16_INST:
+
     SHADD8_INST:
+    SHADD16_INST:
     SHADDSUBX_INST:
-    SHSUB16_INST:
     SHSUB8_INST:
+    SHSUB16_INST:
     SHSUBADDX_INST:
+    {
+        if (inst_base->cond == 0xE || CondPassed(cpu, inst_base->cond)) {
+            generic_arm_inst* const inst_cream = (generic_arm_inst*)inst_base->component;
+
+            const u8 op2 = inst_cream->op2;
+            const u32 rm_val = RM;
+            const u32 rn_val = RN;
+
+            if (op2 == 0x00 || op2 == 0x01 || op2 == 0x02 || op2 == 0x03) {
+                s32 lo_result = 0;
+                s32 hi_result = 0;
+
+                // SHADD16
+                if (op2 == 0x00) {
+                    lo_result = ((s16)(rn_val & 0xFFFF) + (s16)(rm_val & 0xFFFF)) >> 1;
+                    hi_result = ((s16)((rn_val >> 16) & 0xFFFF) + (s16)((rm_val >> 16) & 0xFFFF)) >> 1;
+                }
+                // SHASX
+                else if (op2 == 0x01) {
+                    lo_result = ((s16)(rn_val & 0xFFFF) - (s16)((rm_val >> 16) & 0xFFFF)) >> 1;
+                    hi_result = ((s16)((rn_val >> 16) & 0xFFFF) + (s16)(rm_val & 0xFFFF)) >> 1;
+                }
+                // SHSAX
+                else if (op2 == 0x02) {
+                    lo_result = ((s16)(rn_val & 0xFFFF) + (s16)((rm_val >> 16) & 0xFFFF)) >> 1;
+                    hi_result = ((s16)((rn_val >> 16) & 0xFFFF) - (s16)(rm_val & 0xFFFF)) >> 1;
+                }
+                // SHSUB16
+                else if (op2 == 0x03) {
+                    lo_result = ((s16)(rn_val & 0xFFFF) - (s16)(rm_val & 0xFFFF)) >> 1;
+                    hi_result = ((s16)((rn_val >> 16) & 0xFFFF) - (s16)((rm_val >> 16) & 0xFFFF)) >> 1;
+                }
+
+                RD = ((lo_result & 0xFFFF) | ((hi_result & 0xFFFF) << 16));
+            }
+            else if (op2 == 0x04 || op2 == 0x07) {
+                s16 lo_val1, lo_val2;
+                s16 hi_val1, hi_val2;
+
+                // SHADD8
+                if (op2 == 0x04) {
+                    lo_val1 = ((s8)(rn_val & 0xFF) + (s8)(rm_val & 0xFF)) >> 1;
+                    lo_val2 = ((s8)((rn_val >> 8) & 0xFF) + (s8)((rm_val >> 8) & 0xFF)) >> 1;
+
+                    hi_val1 = ((s8)((rn_val >> 16) & 0xFF) + (s8)((rm_val >> 16) & 0xFF)) >> 1;
+                    hi_val2 = ((s8)((rn_val >> 24) & 0xFF) + (s8)((rm_val >> 24) & 0xFF)) >> 1;
+                }
+                // SHSUB8
+                else {
+                    lo_val1 = ((s8)(rn_val & 0xFF) - (s8)(rm_val & 0xFF)) >> 1;
+                    lo_val2 = ((s8)((rn_val >> 8) & 0xFF) - (s8)((rm_val >> 8) & 0xFF)) >> 1;
+
+                    hi_val1 = ((s8)((rn_val >> 16) & 0xFF) - (s8)((rm_val >> 16) & 0xFF)) >> 1;
+                    hi_val2 = ((s8)((rn_val >> 24) & 0xFF) - (s8)((rm_val >> 24) & 0xFF)) >> 1;
+                }
+
+                RD = (lo_val1 & 0xFF) | ((lo_val2 & 0xFF) << 8) | ((hi_val1 & 0xFF) << 16) | ((hi_val2 & 0xFF) << 24);
+            }
+        }
+
+        cpu->Reg[15] += GET_INST_SIZE(cpu);
+        INC_PC(sizeof(generic_arm_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
     SMLA_INST:
     {
         if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
@@ -5198,51 +5588,67 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
                 operand2 = (BIT(RS, 31)) ? (BITS(RS, 16, 31) | 0xffff0000) : BITS(RS, 16, 31);
             RD = operand1 * operand2 + RN;
 
-            // TODO: FIXME: UPDATE Q FLAGS
+            if (AddOverflow(operand1 * operand2, RN, RD))
+                cpu->Cpsr |= (1 << 27);
         }
         cpu->Reg[15] += GET_INST_SIZE(cpu);
         INC_PC(sizeof(smla_inst));
         FETCH_INST;
         GOTO_NEXT_INST;
     }
+
     SMLAD_INST:
+    SMLSD_INST:
+    SMUAD_INST:
+    SMUSD_INST:
     {
-        if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
-            smlad_inst *inst_cream = (smlad_inst *)inst_base->component;
-            long long int rm = cpu->Reg[inst_cream->Rm];
-            long long int rn = cpu->Reg[inst_cream->Rn];
-            long long int ra = cpu->Reg[inst_cream->Ra];
+        if (inst_base->cond == 0xE || CondPassed(cpu, inst_base->cond)) {
+            smlad_inst* const inst_cream = (smlad_inst*)inst_base->component;
+            const u8 op2 = inst_cream->op2;
 
-            // See SMUAD
-            if(inst_cream->Ra == 15)
-                CITRA_IGNORE_EXIT(-1);
-            int operand2 = (inst_cream->m)? ROTATE_RIGHT_32(rm, 16):rm;
-            int half_rn, half_operand2;
+            u32 rm_val = cpu->Reg[inst_cream->Rm];
+            const u32 rn_val = cpu->Reg[inst_cream->Rn];
 
-            half_rn = rn & 0xFFFF;
-            half_rn = (half_rn & 0x8000)? (0xFFFF0000|half_rn) : half_rn;
+            if (inst_cream->m)
+                rm_val = (((rm_val & 0xFFFF) << 16) | (rm_val >> 16));
 
-            half_operand2 = operand2 & 0xFFFF;
-            half_operand2 = (half_operand2 & 0x8000)? (0xFFFF0000|half_operand2) : half_operand2;
+            const s16 rm_lo = (rm_val & 0xFFFF);
+            const s16 rm_hi = ((rm_val >> 16) & 0xFFFF);
+            const s16 rn_lo = (rn_val & 0xFFFF);
+            const s16 rn_hi = ((rn_val >> 16) & 0xFFFF);
 
-            long long int product1 = half_rn * half_operand2;
+            const u32 product1 = (rn_lo * rm_lo);
+            const u32 product2 = (rn_hi * rm_hi);
 
-            half_rn = (rn & 0xFFFF0000) >> 16;
-            half_rn = (half_rn & 0x8000)? (0xFFFF0000|half_rn) : half_rn;
+            // SMUAD and SMLAD
+            if (BIT(op2, 1) == 0) {
+                RD = (product1 + product2);
 
-            half_operand2 = (operand2 & 0xFFFF0000) >> 16;
-            half_operand2 = (half_operand2 & 0x8000)? (0xFFFF0000|half_operand2) : half_operand2;
+                if (inst_cream->Ra != 15) {
+                    RD += cpu->Reg[inst_cream->Ra];
 
-            long long int product2 = half_rn * half_operand2;
+                    if (ARMul_AddOverflowQ(product1 + product2, cpu->Reg[inst_cream->Ra]))
+                        cpu->Cpsr |= (1 << 27);
+                }
 
-            long long int signed_ra = (ra & 0x80000000)? (0xFFFFFFFF00000000LL) | ra : ra;
-            long long int result = product1 + product2 + signed_ra;
-            cpu->Reg[inst_cream->Rd] = result & 0xFFFFFFFF;
+                if (ARMul_AddOverflowQ(product1, product2))
+                    cpu->Cpsr |= (1 << 27);
+            }
+            // SMUSD and SMLSD
+            else {
+                RD = (product1 - product2);
+
+                if (inst_cream->Ra != 15) {
+                    RD += cpu->Reg[inst_cream->Ra];
 
-            // TODO: FIXME should check Signed overflow
+                    if (ARMul_AddOverflowQ(product1 - product2, cpu->Reg[inst_cream->Ra]))
+                        cpu->Cpsr |= (1 << 27);
+                }
+            }
         }
+
         cpu->Reg[15] += GET_INST_SIZE(cpu);
-        INC_PC(sizeof(umlal_inst));
+        INC_PC(sizeof(smlad_inst));
         FETCH_INST;
         GOTO_NEXT_INST;
     }
@@ -5275,15 +5681,108 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
         FETCH_INST;
         GOTO_NEXT_INST;
     }
+
     SMLALXY_INST:
-    SMLALD_INST:
+
     SMLAW_INST:
-    SMLSD_INST:
+    {
+        if (inst_base->cond == 0xE || CondPassed(cpu, inst_base->cond)) {
+            smlad_inst* const inst_cream = (smlad_inst*)inst_base->component;
+
+            const u32 rm_val = RM;
+            const u32 rn_val = RN;
+            const u32 ra_val = cpu->Reg[inst_cream->Ra];
+            const bool high = (inst_cream->m == 1);
+
+            const s16 operand2 = (high) ? ((rm_val >> 16) & 0xFFFF) : (rm_val & 0xFFFF);
+            const s64 result = (s64)(s32)rn_val * (s64)(s32)operand2 + ((s64)(s32)ra_val << 16);
+
+            RD = (result & (0xFFFFFFFFFFFFFFFFLL >> 15)) >> 16;
+
+            if ((result >> 16) != (s32)RD)
+                cpu->Cpsr |= (1 << 27);
+        }
+
+        cpu->Reg[15] += GET_INST_SIZE(cpu);
+        INC_PC(sizeof(smlad_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
+    SMLALD_INST:
     SMLSLD_INST:
+    {
+        if (inst_base->cond == 0xE || CondPassed(cpu, inst_base->cond)) {
+            smlald_inst* const inst_cream = (smlald_inst*)inst_base->component;
+
+            const bool do_swap = (inst_cream->swap == 1);
+            const u32 rdlo_val = RDLO;
+            const u32 rdhi_val = RDHI;
+            const u32 rn_val   = RN;
+            u32 rm_val         = RM;
+
+            if (do_swap)
+                rm_val = (((rm_val & 0xFFFF) << 16) | (rm_val >> 16));
+            
+            const s32 product1 = (s16)(rn_val & 0xFFFF) * (s16)(rm_val & 0xFFFF);
+            const s32 product2 = (s16)((rn_val >> 16) & 0xFFFF) * (s16)((rm_val >> 16) & 0xFFFF);
+            s64 result;
+
+            // SMLALD
+            if (BIT(inst_cream->op2, 1) == 0) {
+                result = (product1 + product2) + (s64)(rdlo_val | ((s64)rdhi_val << 32));
+            }
+            // SMLSLD
+            else {
+                result = (product1 - product2) + (s64)(rdlo_val | ((s64)rdhi_val << 32));
+            }
+
+            RDLO = (result & 0xFFFFFFFF);
+            RDHI = ((result >> 32) & 0xFFFFFFFF);
+        }
+
+        cpu->Reg[15] += GET_INST_SIZE(cpu);
+        INC_PC(sizeof(smlald_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
     SMMLA_INST:
     SMMLS_INST:
     SMMUL_INST:
-    SMUAD_INST:
+    {
+        if (inst_base->cond == 0xE || CondPassed(cpu, inst_base->cond)) {
+            smlad_inst* const inst_cream = (smlad_inst*)inst_base->component;
+
+            const u32 rm_val = RM;
+            const u32 rn_val = RN;
+            const bool do_round = (inst_cream->m == 1);
+
+            // Assume SMMUL by default.
+            s64 result = (s64)(s32)rn_val * (s64)(s32)rm_val;
+
+            if (inst_cream->Ra != 15) {
+                const u32 ra_val = cpu->Reg[inst_cream->Ra];
+
+                // SMMLA, otherwise SMMLS
+                if (BIT(inst_cream->op2, 1) == 0)
+                    result += ((s64)ra_val << 32);
+                else
+                    result = ((s64)ra_val << 32) - result;
+            }
+
+            if (do_round)
+                result += 0x80000000;
+
+            RD = ((result >> 32) & 0xFFFFFFFF);
+        }
+
+        cpu->Reg[15] += GET_INST_SIZE(cpu);
+        INC_PC(sizeof(smlad_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
     SMUL_INST:
     {
         if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
@@ -5351,8 +5850,8 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
         GOTO_NEXT_INST;
     }
 
-    SMUSD_INST:
     SRS_INST:
+
     SSAT_INST:
     {
         if (inst_base->cond == 0xE || CondPassed(cpu, inst_base->cond)) {
@@ -5407,7 +5906,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
         FETCH_INST;
         GOTO_NEXT_INST;
     }
-    SSUB8_INST:
+
     STC_INST:
     {
         // Instruction not implemented
@@ -5580,46 +6079,96 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
     }
     STREX_INST:
     {
-        ldst_inst *inst_cream = (ldst_inst *)inst_base->component;
+        generic_arm_inst* inst_cream = (generic_arm_inst*)inst_base->component;
+
         if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
-            addr = cpu->Reg[BITS(inst_cream->inst, 16, 19)];
-            unsigned int value = cpu->Reg[BITS(inst_cream->inst, 0, 3)];
+            unsigned int write_addr = cpu->Reg[inst_cream->Rn];
 
-            int dest_reg = BITS(inst_cream->inst, 12, 15);
-            if((exclusive_detect(cpu, addr) == 0) && (cpu->exclusive_state == 1)){
-                remove_exclusive(cpu, addr);
-                cpu->Reg[dest_reg] = 0;
+            if ((exclusive_detect(cpu, write_addr) == 0) && (cpu->exclusive_state == 1)) {
+                remove_exclusive(cpu, write_addr);
                 cpu->exclusive_state = 0;
 
-                Memory::Write32(addr, value);
+                Memory::Write32(write_addr, cpu->Reg[inst_cream->Rm]);
+                RD = 0;
             } else {
                 // Failed to write due to mutex access
-                cpu->Reg[dest_reg] = 1;
+                RD = 1;
             }
         }
         cpu->Reg[15] += GET_INST_SIZE(cpu);
-        INC_PC(sizeof(ldst_inst));
+        INC_PC(sizeof(generic_arm_inst));
         FETCH_INST;
         GOTO_NEXT_INST;
     }
     STREXB_INST:
     {
-        ldst_inst *inst_cream = (ldst_inst *)inst_base->component;
+        generic_arm_inst* inst_cream = (generic_arm_inst*)inst_base->component;
+
         if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
-            addr = cpu->Reg[BITS(inst_cream->inst, 16, 19)];
-            unsigned int value = cpu->Reg[BITS(inst_cream->inst, 0, 3)] & 0xff;
-            int dest_reg = BITS(inst_cream->inst, 12, 15);
-            if((exclusive_detect(cpu, addr) == 0) && (cpu->exclusive_state == 1)){
-                remove_exclusive(cpu, addr);
-                cpu->Reg[dest_reg] = 0;
+            unsigned int write_addr = cpu->Reg[inst_cream->Rn];
+
+            if ((exclusive_detect(cpu, write_addr) == 0) && (cpu->exclusive_state == 1)) {
+                remove_exclusive(cpu, write_addr);
                 cpu->exclusive_state = 0;
-                Memory::Write8(addr, value);
+
+                Memory::Write8(write_addr, cpu->Reg[inst_cream->Rm]);
+                RD = 0;
             } else {
-                cpu->Reg[dest_reg] = 1;
+                // Failed to write due to mutex access
+                RD = 1;
             }
         }
         cpu->Reg[15] += GET_INST_SIZE(cpu);
-        INC_PC(sizeof(ldst_inst));
+        INC_PC(sizeof(generic_arm_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    STREXD_INST:
+    {
+        generic_arm_inst* inst_cream = (generic_arm_inst*)inst_base->component;
+
+        if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+            unsigned int write_addr = cpu->Reg[inst_cream->Rn];
+
+            if ((exclusive_detect(cpu, write_addr) == 0) && (cpu->exclusive_state == 1)) {
+                remove_exclusive(cpu, write_addr);
+                cpu->exclusive_state = 0;
+                // TODO(bunnei): Remove exclusive from [write_addr + 4] if we implement this in LDREXD
+
+                Memory::Write32(write_addr, cpu->Reg[inst_cream->Rm]);
+                Memory::Write32(write_addr + 4, cpu->Reg[inst_cream->Rm + 1]);
+                RD = 0;
+            }
+            else {
+                // Failed to write due to mutex access
+                RD = 1;
+            }
+        }
+        cpu->Reg[15] += GET_INST_SIZE(cpu);
+        INC_PC(sizeof(generic_arm_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+    STREXH_INST:
+    {
+        generic_arm_inst* inst_cream = (generic_arm_inst*)inst_base->component;
+
+        if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+            unsigned int write_addr = cpu->Reg[inst_cream->Rn];
+
+            if ((exclusive_detect(cpu, write_addr) == 0) && (cpu->exclusive_state == 1)) {
+                remove_exclusive(cpu, write_addr);
+                cpu->exclusive_state = 0;
+
+                Memory::Write16(write_addr, cpu->Reg[inst_cream->Rm]);
+                RD = 0;
+            } else {
+                // Failed to write due to mutex access
+                RD = 1;
+            }
+        }
+        cpu->Reg[15] += GET_INST_SIZE(cpu);
+        INC_PC(sizeof(generic_arm_inst));
         FETCH_INST;
         GOTO_NEXT_INST;
     }
@@ -5741,7 +6290,40 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
         FETCH_INST;
         GOTO_NEXT_INST;
     }
+
     SXTAB16_INST:
+    SXTB16_INST:
+    {
+        if (inst_base->cond == 0xE || CondPassed(cpu, inst_base->cond)) {
+            sxtab_inst* const inst_cream = (sxtab_inst*)inst_base->component;
+
+            const u8 rotation = inst_cream->rotate * 8;
+            u32 rm_val = RM;
+            u32 rn_val = RN;
+
+            if (rotation)
+                rm_val = ((rm_val << (32 - rotation)) | (rm_val >> rotation));
+
+            // SXTB16
+            if (inst_cream->Rn == 15) {
+                u32 lo = (u32)(s8)rm_val;
+                u32 hi = (u32)(s8)(rm_val >> 16);
+                RD = (lo | (hi << 16));
+            }
+            // SXTAB16
+            else {
+                u32 lo = (rn_val & 0xFFFF) + (u32)(s8)(rm_val & 0xFF);
+                u32 hi = ((rn_val >> 16) & 0xFFFF) + (u32)(s8)((rm_val >> 16) & 0xFF);
+                RD = (lo | (hi << 16));
+            }
+        }
+
+        cpu->Reg[15] += GET_INST_SIZE(cpu);
+        INC_PC(sizeof(sxtab_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+    }
+
     SXTAH_INST:
     {
         sxtah_inst *inst_cream = (sxtah_inst *)inst_base->component;
@@ -5760,7 +6342,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
         FETCH_INST;
         GOTO_NEXT_INST;
     }
-    SXTB16_INST:
+
     TEQ_INST:
     {
         if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
@@ -6156,7 +6738,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
     BLX_1_THUMB:
     {
         // BLX 1 for armv5t and above
-        uint32 tmp = cpu->Reg[15];
+        u32 tmp = cpu->Reg[15];
         blx_1_thumb *inst_cream = (blx_1_thumb *)inst_base->component;
         cpu->Reg[15] = (cpu->Reg[14] + inst_cream->imm) & 0xFFFFFFFC;
         cpu->Reg[14] = ((tmp + 2) | 1);
@@ -6354,9 +6936,6 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
     }
     INIT_INST_LENGTH:
     {
-#if defined __GNUC__ || defined __clang__
-        InterpreterInitInstLength((unsigned long long int *)InstLabel, sizeof(InstLabel));
-#endif
         cpu->NumInstrsToExecute = 0;
         return num_instrs;
     }
diff --git a/src/core/arm/dyncom/arm_dyncom_run.h b/src/core/arm/dyncom/arm_dyncom_run.h
index aeabeac16..c70522274 100644
--- a/src/core/arm/dyncom/arm_dyncom_run.h
+++ b/src/core/arm/dyncom/arm_dyncom_run.h
@@ -24,8 +24,8 @@
 void switch_mode(arm_core_t *core, uint32_t mode);
 
 /* FIXME, we temporarily think thumb instruction is always 16 bit */
-static inline uint32 GET_INST_SIZE(arm_core_t* core){
-	return core->TFlag? 2 : 4;
+static inline u32 GET_INST_SIZE(arm_core_t* core) {
+    return core->TFlag? 2 : 4;
 }
 
 /**
@@ -36,8 +36,8 @@ static inline uint32 GET_INST_SIZE(arm_core_t* core){
 *
 * @return 
 */
-static inline addr_t CHECK_READ_REG15_WA(arm_core_t* core, int Rn){
-	return (Rn == 15)? ((core->Reg[15] & ~0x3) + GET_INST_SIZE(core) * 2) : core->Reg[Rn];
+static inline addr_t CHECK_READ_REG15_WA(arm_core_t* core, int Rn) {
+    return (Rn == 15)? ((core->Reg[15] & ~0x3) + GET_INST_SIZE(core) * 2) : core->Reg[Rn];
 }
 
 /**
@@ -48,8 +48,8 @@ static inline addr_t CHECK_READ_REG15_WA(arm_core_t* core, int Rn){
 *
 * @return 
 */
-static inline uint32 CHECK_READ_REG15(arm_core_t* core, int Rn){
-	return (Rn == 15)? ((core->Reg[15] & ~0x1) + GET_INST_SIZE(core) * 2) : core->Reg[Rn];
+static inline u32 CHECK_READ_REG15(arm_core_t* core, int Rn) {
+    return (Rn == 15)? ((core->Reg[15] & ~0x1) + GET_INST_SIZE(core) * 2) : core->Reg[Rn];
 }
 
 #endif
diff --git a/src/core/arm/dyncom/arm_dyncom_thumb.h b/src/core/arm/dyncom/arm_dyncom_thumb.h
index 5541de9d1..bf69b2fd4 100644
--- a/src/core/arm/dyncom/arm_dyncom_thumb.h
+++ b/src/core/arm/dyncom/arm_dyncom_thumb.h
@@ -37,10 +37,10 @@ enum tdstate {
     t_uninitialized,
 };
 
-tdstate
-thumb_translate(addr_t addr, uint32_t instr, uint32_t* ainstr, uint32_t* inst_size);
-static inline uint32 get_thumb_instr(uint32 instr, addr_t pc){
-    uint32 tinstr;
+tdstate thumb_translate(addr_t addr, u32 instr, u32* ainstr, u32* inst_size);
+
+static inline u32 get_thumb_instr(u32 instr, addr_t pc) {
+    u32 tinstr;
     if ((pc & 0x3) != 0)
         tinstr = instr >> 16;
     else
diff --git a/src/core/arm/interpreter/armemu.cpp b/src/core/arm/interpreter/armemu.cpp
index b9c2aa6c2..12166bf79 100644
--- a/src/core/arm/interpreter/armemu.cpp
+++ b/src/core/arm/interpreter/armemu.cpp
@@ -5881,67 +5881,45 @@ L_stm_s_takeabort:
                 const u32 rm_val = state->Reg[rm_idx];
                 const u32 rn_val = state->Reg[rn_idx];
 
-                u8 lo_val1;
-                u8 lo_val2;
-                u8 hi_val1;
-                u8 hi_val2;
+                s32 lo_val1, lo_val2;
+                s32 hi_val1, hi_val2;
 
                 // SADD8
                 if ((instr & 0xFF0) == 0xf90) {
-                    lo_val1 = (u8)((rn_val & 0xFF) + (rm_val & 0xFF));
-                    lo_val2 = (u8)(((rn_val >> 8) & 0xFF) + ((rm_val >> 8) & 0xFF));
-                    hi_val1 = (u8)(((rn_val >> 16) & 0xFF) + ((rm_val >> 16) & 0xFF));
-                    hi_val2 = (u8)(((rn_val >> 24) & 0xFF) + ((rm_val >> 24) & 0xFF));
-
-                    if (lo_val1 & 0x80)
-                        state->GEFlag |= (1 << 16);
-                    else
-                        state->GEFlag &= ~(1 << 16);
-
-                    if (lo_val2 & 0x80)
-                        state->GEFlag |= (1 << 17);
-                    else
-                        state->GEFlag &= ~(1 << 17);
-
-                    if (hi_val1 & 0x80)
-                        state->GEFlag |= (1 << 18);
-                    else
-                        state->GEFlag &= ~(1 << 18);
-
-                    if (hi_val2 & 0x80)
-                        state->GEFlag |= (1 << 19);
-                    else
-                        state->GEFlag &= ~(1 << 19);
+                    lo_val1 = (s32)(s8)(rn_val & 0xFF) + (s32)(s8)(rm_val & 0xFF);
+                    lo_val2 = (s32)(s8)((rn_val >> 8) & 0xFF)  + (s32)(s8)((rm_val >> 8) & 0xFF);
+                    hi_val1 = (s32)(s8)((rn_val >> 16) & 0xFF) + (s32)(s8)((rm_val >> 16) & 0xFF);
+                    hi_val2 = (s32)(s8)((rn_val >> 24) & 0xFF) + (s32)(s8)((rm_val >> 24) & 0xFF);
                 }
                 // SSUB8
                 else {
-                    lo_val1 = (u8)((rn_val & 0xFF) - (rm_val & 0xFF));
-                    lo_val2 = (u8)(((rn_val >> 8) & 0xFF) - ((rm_val >> 8) & 0xFF));
-                    hi_val1 = (u8)(((rn_val >> 16) & 0xFF) - ((rm_val >> 16) & 0xFF));
-                    hi_val2 = (u8)(((rn_val >> 24) & 0xFF) - ((rm_val >> 24) & 0xFF));
+                    lo_val1 = (s32)(s8)(rn_val & 0xFF) - (s32)(s8)(rm_val & 0xFF);
+                    lo_val2 = (s32)(s8)((rn_val >> 8) & 0xFF)  - (s32)(s8)((rm_val >> 8) & 0xFF);
+                    hi_val1 = (s32)(s8)((rn_val >> 16) & 0xFF) - (s32)(s8)((rm_val >> 16) & 0xFF);
+                    hi_val2 = (s32)(s8)((rn_val >> 24) & 0xFF) - (s32)(s8)((rm_val >> 24) & 0xFF);
+                }
 
-                    if (!(lo_val1 & 0x80))
-                        state->GEFlag |= (1 << 16);
-                    else
-                        state->GEFlag &= ~(1 << 16);
+                if (lo_val1 >= 0)
+                    state->GEFlag |= (1 << 16);
+                else
+                    state->GEFlag &= ~(1 << 16);
 
-                    if (!(lo_val2 & 0x80))
-                        state->GEFlag |= (1 << 17);
-                    else
-                        state->GEFlag &= ~(1 << 17);
+                if (lo_val2 >= 0)
+                    state->GEFlag |= (1 << 17);
+                else
+                    state->GEFlag &= ~(1 << 17);
 
-                    if (!(hi_val1 & 0x80))
-                        state->GEFlag |= (1 << 18);
-                    else
-                        state->GEFlag &= ~(1 << 18);
+                if (hi_val1 >= 0)
+                    state->GEFlag |= (1 << 18);
+                else
+                    state->GEFlag &= ~(1 << 18);
 
-                    if (!(hi_val2 & 0x80))
-                        state->GEFlag |= (1 << 19);
-                    else
-                        state->GEFlag &= ~(1 << 19);
-                }
+                if (hi_val2 >= 0)
+                    state->GEFlag |= (1 << 19);
+                else
+                    state->GEFlag &= ~(1 << 19);
 
-                state->Reg[rd_idx] = (lo_val1 | lo_val2 << 8 | hi_val1 << 16 | hi_val2 << 24);
+                state->Reg[rd_idx] = ((lo_val1 & 0xFF) | ((lo_val2 & 0xFF) << 8) | ((hi_val1 & 0xFF) << 16) | ((hi_val2 & 0xFF) << 24));
                 return 1;
             }
             else {
@@ -6492,17 +6470,23 @@ L_stm_s_takeabort:
 
                     if (BITS(12, 15) != 15) {
                         state->Reg[rd_idx] += state->Reg[ra_idx];
-                        ARMul_AddOverflowQ(state, product1 + product2, state->Reg[ra_idx]);
+                        if (ARMul_AddOverflowQ(product1 + product2, state->Reg[ra_idx]))
+                            SETQ;
                     }
 
-                    ARMul_AddOverflowQ(state, product1, product2);
+                    if (ARMul_AddOverflowQ(product1, product2))
+                        SETQ;
                 }
                 // SMUSD and SMLSD
                 else {
                     state->Reg[rd_idx] = product1 - product2;
-                    
-                    if (BITS(12, 15) != 15)
+
+                    if (BITS(12, 15) != 15) {
                         state->Reg[rd_idx] += state->Reg[ra_idx];
+
+                        if (ARMul_AddOverflowQ(product1 - product2, state->Reg[ra_idx]))
+                            SETQ;
+                    }
                 }
 
                 return 1;
diff --git a/src/core/arm/interpreter/armsupp.cpp b/src/core/arm/interpreter/armsupp.cpp
index 426b67831..68ac2a0ce 100644
--- a/src/core/arm/interpreter/armsupp.cpp
+++ b/src/core/arm/interpreter/armsupp.cpp
@@ -418,22 +418,18 @@ ARMul_NegZero (ARMul_State * state, ARMword result)
     }
 }
 
-/* Compute whether an addition of A and B, giving RESULT, overflowed.  */
-
-int
-AddOverflow (ARMword a, ARMword b, ARMword result)
+// Compute whether an addition of A and B, giving RESULT, overflowed.
+bool AddOverflow(ARMword a, ARMword b, ARMword result)
 {
-    return ((NEG (a) && NEG (b) && POS (result))
-            || (POS (a) && POS (b) && NEG (result)));
+    return ((NEG(a) && NEG(b) && POS(result)) ||
+            (POS(a) && POS(b) && NEG(result)));
 }
 
-/* Compute whether a subtraction of A and B, giving RESULT, overflowed.  */
-
-int
-SubOverflow (ARMword a, ARMword b, ARMword result)
+// Compute whether a subtraction of A and B, giving RESULT, overflowed.
+bool SubOverflow(ARMword a, ARMword b, ARMword result)
 {
-    return ((NEG (a) && POS (b) && POS (result))
-            || (POS (a) && NEG (b) && NEG (result)));
+    return ((NEG(a) && POS(b) && POS(result)) ||
+            (POS(a) && NEG(b) && NEG(result)));
 }
 
 /* Assigns the C flag after an addition of a and b to give result.  */
@@ -453,12 +449,14 @@ ARMul_AddOverflow (ARMul_State * state, ARMword a, ARMword b, ARMword result)
     ASSIGNV (AddOverflow (a, b, result));
 }
 
-/* Assigns the Q flag if the given result is considered an overflow from the addition of a and b  */
-void ARMul_AddOverflowQ(ARMul_State* state, ARMword a, ARMword b)
+// Returns true if the Q flag should be set as a result of overflow.
+bool ARMul_AddOverflowQ(ARMword a, ARMword b)
 {
     u32 result = a + b;
     if (((result ^ a) & (u32)0x80000000) && ((a ^ b) & (u32)0x80000000) == 0)
-        SETQ;
+        return true;
+
+    return false;
 }
 
 /* Assigns the C flag after an subtraction of a and b to give result.  */
diff --git a/src/core/arm/skyeye_common/armdefs.h b/src/core/arm/skyeye_common/armdefs.h
index 8611d7392..3100d7adc 100644
--- a/src/core/arm/skyeye_common/armdefs.h
+++ b/src/core/arm/skyeye_common/armdefs.h
@@ -70,6 +70,9 @@
 #define DATACACHE  1
 #define INSTCACHE  2
 
+#define POS(i) ( (~(i)) >> 31 )
+#define NEG(i) ( (i) >> 31 )
+
 #ifndef __STDC__
 typedef char *VoidStar;
 #endif
@@ -783,6 +786,8 @@ RUn %x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x\n",\
 //#define PXA250      0x69052903
 // 0x69052903;  //PXA250 B1 from intel 278522-001.pdf
 
+extern bool AddOverflow(ARMword, ARMword, ARMword);
+extern bool SubOverflow(ARMword, ARMword, ARMword);
 
 extern void ARMul_UndefInstr(ARMul_State*, ARMword);
 extern void ARMul_FixCPSR(ARMul_State*, ARMword, ARMword);
@@ -790,6 +795,8 @@ extern void ARMul_FixSPSR(ARMul_State*, ARMword, ARMword);
 extern void ARMul_ConsolePrint(ARMul_State*, const char*, ...);
 extern void ARMul_SelectProcessor(ARMul_State*, unsigned);
 
+extern bool ARMul_AddOverflowQ(ARMword, ARMword);
+
 extern u8 ARMul_SignedSaturatedAdd8(u8, u8);
 extern u8 ARMul_SignedSaturatedSub8(u8, u8);
 extern u16 ARMul_SignedSaturatedAdd16(u16, u16);
diff --git a/src/core/arm/skyeye_common/armemu.h b/src/core/arm/skyeye_common/armemu.h
index 3ea14b5a3..1dfcc635a 100644
--- a/src/core/arm/skyeye_common/armemu.h
+++ b/src/core/arm/skyeye_common/armemu.h
@@ -42,9 +42,6 @@
 #define R15FBIT (1L << 26)
 #define R15IFBITS (3L << 26)
 
-#define POS(i) ( (~(i)) >> 31 )
-#define NEG(i) ( (i) >> 31 )
-
 #ifdef MODET			/* Thumb support.  */
 /* ??? This bit is actually in the low order bit of the PC in the hardware.
    It isn't clear if the simulator needs to model that or not.  */
@@ -561,8 +558,7 @@ tdstate;
 
 /* Prototypes for exported functions.  */
 extern unsigned ARMul_NthReg (ARMword, unsigned);
-extern int AddOverflow (ARMword, ARMword, ARMword);
-extern int SubOverflow (ARMword, ARMword, ARMword);
+
 /* Prototypes for exported functions.  */
 #ifdef __cplusplus
  extern "C" {
@@ -602,7 +598,6 @@ extern ARMword ARMul_SwitchMode (ARMul_State *, ARMword, ARMword);
 extern void ARMul_MSRCpsr (ARMul_State *, ARMword, ARMword);
 extern void ARMul_SubOverflow (ARMul_State *, ARMword, ARMword, ARMword);
 extern void ARMul_AddOverflow (ARMul_State *, ARMword, ARMword, ARMword);
-extern void ARMul_AddOverflowQ(ARMul_State*, ARMword, ARMword);
 extern void ARMul_SubCarry (ARMul_State *, ARMword, ARMword, ARMword);
 extern void ARMul_AddCarry (ARMul_State *, ARMword, ARMword, ARMword);
 extern tdstate ARMul_ThumbDecode (ARMul_State *, ARMword, ARMword, ARMword *);
diff --git a/src/core/arm/skyeye_common/skyeye_types.h b/src/core/arm/skyeye_common/skyeye_types.h
index e7f022f19..fc7d8d922 100644
--- a/src/core/arm/skyeye_common/skyeye_types.h
+++ b/src/core/arm/skyeye_common/skyeye_types.h
@@ -22,34 +22,10 @@
  * 12/16/2006   Michael.Kang  <blackfin.kang@gmail.com>
  */
 
-#ifndef __SKYEYE_TYPES_H
-#define __SKYEYE_TYPES_H
+#pragma once
 
-#include <stdint.h>
-
-/*default machine word length */
-
-#ifndef __BEOS__
-/* To avoid the type conflict with the qemu */
-#ifndef QEMU
-typedef uint8_t uint8;
-typedef uint16_t uint16;
-typedef uint32_t uint32;
-typedef uint64_t uint64;
-
-typedef int8_t sint8;
-typedef int16_t sint16;
-typedef int32_t sint32;
-typedef int64_t sint64;
-#endif
+#include <cstdint>
 
 typedef uint32_t address_t;
-typedef uint32_t uinteger_t;
-typedef int32_t integer_t;
-
 typedef uint32_t physical_address_t;
-typedef uint32_t generic_address_t; 
-
-#endif
-
-#endif
+typedef uint32_t generic_address_t;
diff --git a/src/core/arm/skyeye_common/vfp/vfpinstr.cpp b/src/core/arm/skyeye_common/vfp/vfpinstr.cpp
index b5fcbac86..2c1218c30 100644
--- a/src/core/arm/skyeye_common/vfp/vfpinstr.cpp
+++ b/src/core/arm/skyeye_common/vfp/vfpinstr.cpp
@@ -1,22 +1,6 @@
-/*
-    vfp/vfpinstr.c - ARM VFPv3 emulation unit - Individual instructions data
-    Copyright (C) 2003 Skyeye Develop Group
-    for help please send mail to <skyeye-developer@lists.gro.clinux.org>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program; if not, write to the Free Software
-    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-*/
+// Copyright 2012 Michael Kang, 2015 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
 
 /* Notice: this file should not be compiled as is, and is meant to be
    included in other files only. */
@@ -30,52 +14,52 @@
 /* cond 1110 0D00 Vn-- Vd-- 101X N0M0 Vm-- */
 #ifdef VFP_INTERPRETER_STRUCT
 typedef struct _vmla_inst {
-	unsigned int instr;
-	unsigned int dp_operation;
+    unsigned int instr;
+    unsigned int dp_operation;
 } vmla_inst;
 #endif
 #ifdef VFP_INTERPRETER_TRANS
 ARM_INST_PTR INTERPRETER_TRANSLATE(vmla)(unsigned int inst, int index)
 {
-	VFP_DEBUG_TRANSLATE;
-	
-	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vmla_inst));
-	vmla_inst *inst_cream = (vmla_inst *)inst_base->component;
+    VFP_DEBUG_TRANSLATE;
+
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vmla_inst));
+    vmla_inst *inst_cream = (vmla_inst *)inst_base->component;
 
-	inst_base->cond  = BITS(inst, 28, 31);
-	inst_base->idx	 = index;
-	inst_base->br	 = NON_BRANCH;
-	inst_base->load_r15 = 0;
+    inst_base->cond  = BITS(inst, 28, 31);
+    inst_base->idx     = index;
+    inst_base->br     = NON_BRANCH;
+    inst_base->load_r15 = 0;
 
-	inst_cream->dp_operation = BIT(inst, 8);
-	inst_cream->instr = inst;
-	
-	return inst_base;
+    inst_cream->dp_operation = BIT(inst, 8);
+    inst_cream->instr = inst;
+
+    return inst_base;
 }
 #endif
 #ifdef VFP_INTERPRETER_IMPL
 VMLA_INST:
 {
-	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
-		CHECK_VFP_ENABLED;
-		
-		DBG("VMLA :\n");
-		
-		vmla_inst *inst_cream = (vmla_inst *)inst_base->component;
+    if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        DBG("VMLA :\n");
 
-		int ret;
-		
-		if (inst_cream->dp_operation)
-			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
-		else
-			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+        vmla_inst *inst_cream = (vmla_inst *)inst_base->component;
 
-		CHECK_VFP_CDP_RET;
-	}
-	cpu->Reg[15] += GET_INST_SIZE(cpu);
-	INC_PC(sizeof(vmla_inst));
-	FETCH_INST;
-	GOTO_NEXT_INST;
+        int ret;
+
+        if (inst_cream->dp_operation)
+            ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+        else
+            ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+        CHECK_VFP_CDP_RET;
+    }
+    cpu->Reg[15] += GET_INST_SIZE(cpu);
+    INC_PC(sizeof(vmla_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
 }
 #endif
 
@@ -85,66 +69,66 @@ DYNCOM_FILL_ACTION(vmla),
 #ifdef VFP_DYNCOM_TAG
 int DYNCOM_TAG(vmla)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
 {
-	int instr_size = INSTR_SIZE;
-	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
-	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
-	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
-	return instr_size;
+    int instr_size = INSTR_SIZE;
+    //DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+    //arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+    arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+    return instr_size;
 }
 #endif
 #ifdef VFP_DYNCOM_TRANS
 int DYNCOM_TRANS(vmla)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
-	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
-	//arch_arm_undef(cpu, bb, instr);
-	int m;
-	int n;
-	int d ;
-	int add = (BIT(6) == 0);
-	int s = BIT(8) == 0;
-	Value *mm;
-	Value *nn;
-	Value *tmp;
-	if(s){
-		m = BIT(5) | BITS(0,3) << 1;
-		n = BIT(7) | BITS(16,19) << 1;
-		d = BIT(22) | BITS(12,15) << 1;
-		mm = FR32(m);
-		nn = FR32(n);
-		tmp = FPMUL(nn,mm);
-		if(!add)
-			tmp = FPNEG32(tmp);
-		mm = FR32(d);
-		tmp = FPADD(mm,tmp);
-		//LETS(d,tmp);
-		LETFPS(d,tmp);
-	}else {
-		m = BITS(0,3) | BIT(5) << 4;
-		n = BITS(16,19) | BIT(7) << 4;
-		d = BIT(22) << 4 | BITS(12,15);
-		//mm = SITOFP(32,RSPR(m));
-		//LETS(d,tmp);
-		mm = ZEXT64(IBITCAST32(FR32(2 * m)));
-		nn = ZEXT64(IBITCAST32(FR32(2 * m  + 1)));
-		tmp = OR(SHL(nn,CONST64(32)),mm);
-		mm = FPBITCAST64(tmp);
-		tmp = ZEXT64(IBITCAST32(FR32(2 * n)));
-		nn = ZEXT64(IBITCAST32(FR32(2 * n  + 1)));
-		nn = OR(SHL(nn,CONST64(32)),tmp);
-		nn = FPBITCAST64(nn);
-		tmp = FPMUL(nn,mm);
-		if(!add)
-			tmp = FPNEG64(tmp);
-		mm = ZEXT64(IBITCAST32(FR32(2 * d)));
-		nn = ZEXT64(IBITCAST32(FR32(2 * d  + 1)));
-		mm = OR(SHL(nn,CONST64(32)),mm);
-		mm = FPBITCAST64(mm);
-		tmp = FPADD(mm,tmp);
-		mm = TRUNC32(LSHR(IBITCAST64(tmp),CONST64(32)));
-		nn = TRUNC32(AND(IBITCAST64(tmp),CONST64(0xffffffff)));	
-		LETFPS(2*d ,FPBITCAST32(nn));
-		LETFPS(d*2 + 1 , FPBITCAST32(mm));
-	}
-	return No_exp;
+    //DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+    //arch_arm_undef(cpu, bb, instr);
+    int m;
+    int n;
+    int d ;
+    int add = (BIT(6) == 0);
+    int s = BIT(8) == 0;
+    Value *mm;
+    Value *nn;
+    Value *tmp;
+    if(s){
+        m = BIT(5) | BITS(0,3) << 1;
+        n = BIT(7) | BITS(16,19) << 1;
+        d = BIT(22) | BITS(12,15) << 1;
+        mm = FR32(m);
+        nn = FR32(n);
+        tmp = FPMUL(nn,mm);
+        if(!add)
+            tmp = FPNEG32(tmp);
+        mm = FR32(d);
+        tmp = FPADD(mm,tmp);
+        //LETS(d,tmp);
+        LETFPS(d,tmp);
+    }else {
+        m = BITS(0,3) | BIT(5) << 4;
+        n = BITS(16,19) | BIT(7) << 4;
+        d = BIT(22) << 4 | BITS(12,15);
+        //mm = SITOFP(32,RSPR(m));
+        //LETS(d,tmp);
+        mm = ZEXT64(IBITCAST32(FR32(2 * m)));
+        nn = ZEXT64(IBITCAST32(FR32(2 * m  + 1)));
+        tmp = OR(SHL(nn,CONST64(32)),mm);
+        mm = FPBITCAST64(tmp);
+        tmp = ZEXT64(IBITCAST32(FR32(2 * n)));
+        nn = ZEXT64(IBITCAST32(FR32(2 * n  + 1)));
+        nn = OR(SHL(nn,CONST64(32)),tmp);
+        nn = FPBITCAST64(nn);
+        tmp = FPMUL(nn,mm);
+        if(!add)
+            tmp = FPNEG64(tmp);
+        mm = ZEXT64(IBITCAST32(FR32(2 * d)));
+        nn = ZEXT64(IBITCAST32(FR32(2 * d  + 1)));
+        mm = OR(SHL(nn,CONST64(32)),mm);
+        mm = FPBITCAST64(mm);
+        tmp = FPADD(mm,tmp);
+        mm = TRUNC32(LSHR(IBITCAST64(tmp),CONST64(32)));
+        nn = TRUNC32(AND(IBITCAST64(tmp),CONST64(0xffffffff)));    
+        LETFPS(2*d ,FPBITCAST32(nn));
+        LETFPS(d*2 + 1 , FPBITCAST32(mm));
+    }
+    return No_exp;
 }
 #endif
 
@@ -153,52 +137,52 @@ int DYNCOM_TRANS(vmla)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
 /* cond 1110 0D00 Vn-- Vd-- 101X N1M0 Vm-- */
 #ifdef VFP_INTERPRETER_STRUCT
 typedef struct _vmls_inst {
-	unsigned int instr;
-	unsigned int dp_operation;
+    unsigned int instr;
+    unsigned int dp_operation;
 } vmls_inst;
 #endif
 #ifdef VFP_INTERPRETER_TRANS
 ARM_INST_PTR INTERPRETER_TRANSLATE(vmls)(unsigned int inst, int index)
 {
-	VFP_DEBUG_TRANSLATE;
-	
-	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vmls_inst));
-	vmls_inst *inst_cream = (vmls_inst *)inst_base->component;
+    VFP_DEBUG_TRANSLATE;
+
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vmls_inst));
+    vmls_inst *inst_cream = (vmls_inst *)inst_base->component;
+
+    inst_base->cond  = BITS(inst, 28, 31);
+    inst_base->idx     = index;
+    inst_base->br     = NON_BRANCH;
+    inst_base->load_r15 = 0;
 
-	inst_base->cond  = BITS(inst, 28, 31);
-	inst_base->idx	 = index;
-	inst_base->br	 = NON_BRANCH;
-	inst_base->load_r15 = 0;
+    inst_cream->dp_operation = BIT(inst, 8);
+    inst_cream->instr = inst;
 
-	inst_cream->dp_operation = BIT(inst, 8);
-	inst_cream->instr = inst;
-	
-	return inst_base;
+    return inst_base;
 }
 #endif
 #ifdef VFP_INTERPRETER_IMPL
 VMLS_INST:
 {
-	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
-		CHECK_VFP_ENABLED;
-		
-		DBG("VMLS :\n");
-		
-		vmls_inst *inst_cream = (vmls_inst *)inst_base->component;
+    if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
 
-		int ret;
-		
-		if (inst_cream->dp_operation)
-			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
-		else
-			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+        DBG("VMLS :\n");
 
-		CHECK_VFP_CDP_RET;
-	}
-	cpu->Reg[15] += GET_INST_SIZE(cpu);
-	INC_PC(sizeof(vmls_inst));
-	FETCH_INST;
-	GOTO_NEXT_INST;
+        vmls_inst *inst_cream = (vmls_inst *)inst_base->component;
+
+        int ret;
+
+        if (inst_cream->dp_operation)
+            ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+        else
+            ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+        CHECK_VFP_CDP_RET;
+    }
+    cpu->Reg[15] += GET_INST_SIZE(cpu);
+    INC_PC(sizeof(vmls_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
 }
 #endif
 
@@ -208,66 +192,66 @@ DYNCOM_FILL_ACTION(vmls),
 #ifdef VFP_DYNCOM_TAG
 int DYNCOM_TAG(vmls)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
 {
-	int instr_size = INSTR_SIZE;
-	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
-	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
-	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
-	return instr_size;
+    int instr_size = INSTR_SIZE;
+    //DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+    //arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+    arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+    return instr_size;
 }
 #endif
 #ifdef VFP_DYNCOM_TRANS
 int DYNCOM_TRANS(vmls)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
-	DBG("\t\tin %s VMLS instruction is executed out of here.\n", __FUNCTION__);
-	//arch_arm_undef(cpu, bb, instr);
-	int m;
-	int n;
-	int d ;
-	int add = (BIT(6) == 0);
-	int s = BIT(8) == 0;
-	Value *mm;
-	Value *nn;
-	Value *tmp;
-	if(s){
-		m = BIT(5) | BITS(0,3) << 1;
-		n = BIT(7) | BITS(16,19) << 1;
-		d = BIT(22) | BITS(12,15) << 1;
-		mm = FR32(m);
-		nn = FR32(n);
-		tmp = FPMUL(nn,mm);
-		if(!add)
-			tmp = FPNEG32(tmp);
-		mm = FR32(d);
-		tmp = FPADD(mm,tmp);
-		//LETS(d,tmp);
-		LETFPS(d,tmp);
-	}else {
-		m = BITS(0,3) | BIT(5) << 4;
-		n = BITS(16,19) | BIT(7) << 4;
-		d = BIT(22) << 4 | BITS(12,15);
-		//mm = SITOFP(32,RSPR(m));
-		//LETS(d,tmp);
-		mm = ZEXT64(IBITCAST32(FR32(2 * m)));
-		nn = ZEXT64(IBITCAST32(FR32(2 * m  + 1)));
-		tmp = OR(SHL(nn,CONST64(32)),mm);
-		mm = FPBITCAST64(tmp);
-		tmp = ZEXT64(IBITCAST32(FR32(2 * n)));
-		nn = ZEXT64(IBITCAST32(FR32(2 * n  + 1)));
-		nn = OR(SHL(nn,CONST64(32)),tmp);
-		nn = FPBITCAST64(nn);
-		tmp = FPMUL(nn,mm);
-		if(!add)
-			tmp = FPNEG64(tmp);
-		mm = ZEXT64(IBITCAST32(FR32(2 * d)));
-		nn = ZEXT64(IBITCAST32(FR32(2 * d  + 1)));
-		mm = OR(SHL(nn,CONST64(32)),mm);
-		mm = FPBITCAST64(mm);
-		tmp = FPADD(mm,tmp);
-		mm = TRUNC32(LSHR(IBITCAST64(tmp),CONST64(32)));
-		nn = TRUNC32(AND(IBITCAST64(tmp),CONST64(0xffffffff)));	
-		LETFPS(2*d ,FPBITCAST32(nn));
-		LETFPS(d*2 + 1 , FPBITCAST32(mm));
-	}	
-	return No_exp;
+    DBG("\t\tin %s VMLS instruction is executed out of here.\n", __FUNCTION__);
+    //arch_arm_undef(cpu, bb, instr);
+    int m;
+    int n;
+    int d ;
+    int add = (BIT(6) == 0);
+    int s = BIT(8) == 0;
+    Value *mm;
+    Value *nn;
+    Value *tmp;
+    if(s){
+        m = BIT(5) | BITS(0,3) << 1;
+        n = BIT(7) | BITS(16,19) << 1;
+        d = BIT(22) | BITS(12,15) << 1;
+        mm = FR32(m);
+        nn = FR32(n);
+        tmp = FPMUL(nn,mm);
+        if(!add)
+            tmp = FPNEG32(tmp);
+        mm = FR32(d);
+        tmp = FPADD(mm,tmp);
+        //LETS(d,tmp);
+        LETFPS(d,tmp);
+    }else {
+        m = BITS(0,3) | BIT(5) << 4;
+        n = BITS(16,19) | BIT(7) << 4;
+        d = BIT(22) << 4 | BITS(12,15);
+        //mm = SITOFP(32,RSPR(m));
+        //LETS(d,tmp);
+        mm = ZEXT64(IBITCAST32(FR32(2 * m)));
+        nn = ZEXT64(IBITCAST32(FR32(2 * m  + 1)));
+        tmp = OR(SHL(nn,CONST64(32)),mm);
+        mm = FPBITCAST64(tmp);
+        tmp = ZEXT64(IBITCAST32(FR32(2 * n)));
+        nn = ZEXT64(IBITCAST32(FR32(2 * n  + 1)));
+        nn = OR(SHL(nn,CONST64(32)),tmp);
+        nn = FPBITCAST64(nn);
+        tmp = FPMUL(nn,mm);
+        if(!add)
+            tmp = FPNEG64(tmp);
+        mm = ZEXT64(IBITCAST32(FR32(2 * d)));
+        nn = ZEXT64(IBITCAST32(FR32(2 * d  + 1)));
+        mm = OR(SHL(nn,CONST64(32)),mm);
+        mm = FPBITCAST64(mm);
+        tmp = FPADD(mm,tmp);
+        mm = TRUNC32(LSHR(IBITCAST64(tmp),CONST64(32)));
+        nn = TRUNC32(AND(IBITCAST64(tmp),CONST64(0xffffffff)));    
+        LETFPS(2*d ,FPBITCAST32(nn));
+        LETFPS(d*2 + 1 , FPBITCAST32(mm));
+    }    
+    return No_exp;
 }
 #endif
 
@@ -276,52 +260,52 @@ int DYNCOM_TRANS(vmls)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
 /* cond 1110 0D01 Vn-- Vd-- 101X N1M0 Vm-- */
 #ifdef VFP_INTERPRETER_STRUCT
 typedef struct _vnmla_inst {
-	unsigned int instr;
-	unsigned int dp_operation;
+    unsigned int instr;
+    unsigned int dp_operation;
 } vnmla_inst;
 #endif
 #ifdef VFP_INTERPRETER_TRANS
 ARM_INST_PTR INTERPRETER_TRANSLATE(vnmla)(unsigned int inst, int index)
 {
-	VFP_DEBUG_TRANSLATE;
-	
-	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vnmla_inst));
-	vnmla_inst *inst_cream = (vnmla_inst *)inst_base->component;
+    VFP_DEBUG_TRANSLATE;
+
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vnmla_inst));
+    vnmla_inst *inst_cream = (vnmla_inst *)inst_base->component;
 
-	inst_base->cond  = BITS(inst, 28, 31);
-	inst_base->idx	 = index;
-	inst_base->br	 = NON_BRANCH;
-	inst_base->load_r15 = 0;
+    inst_base->cond  = BITS(inst, 28, 31);
+    inst_base->idx     = index;
+    inst_base->br     = NON_BRANCH;
+    inst_base->load_r15 = 0;
 
-	inst_cream->dp_operation = BIT(inst, 8);
-	inst_cream->instr = inst;
-	
-	return inst_base;
+    inst_cream->dp_operation = BIT(inst, 8);
+    inst_cream->instr = inst;
+
+    return inst_base;
 }
 #endif
 #ifdef VFP_INTERPRETER_IMPL
 VNMLA_INST:
 {
-	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
-		CHECK_VFP_ENABLED;
-		
-		DBG("VNMLA :\n");
+    if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        DBG("VNMLA :\n");
+
+        vnmla_inst *inst_cream = (vnmla_inst *)inst_base->component;
 
-		vnmla_inst *inst_cream = (vnmla_inst *)inst_base->component;
+        int ret;
 
-		int ret;
-		
-		if (inst_cream->dp_operation)
-			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
-		else
-			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+        if (inst_cream->dp_operation)
+            ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+        else
+            ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
 
-		CHECK_VFP_CDP_RET;
-	}
-	cpu->Reg[15] += GET_INST_SIZE(cpu);
-	INC_PC(sizeof(vnmla_inst));
-	FETCH_INST;
-	GOTO_NEXT_INST;
+        CHECK_VFP_CDP_RET;
+    }
+    cpu->Reg[15] += GET_INST_SIZE(cpu);
+    INC_PC(sizeof(vnmla_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
 }
 #endif
 
@@ -332,66 +316,66 @@ DYNCOM_FILL_ACTION(vnmla),
 #ifdef VFP_DYNCOM_TAG
 int DYNCOM_TAG(vnmla)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
 {
-	int instr_size = INSTR_SIZE;
-	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
-	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
-	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
-	return instr_size;
+    int instr_size = INSTR_SIZE;
+    //DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+    //arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+    arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+    return instr_size;
 }
 #endif
 #ifdef VFP_DYNCOM_TRANS
 int DYNCOM_TRANS(vnmla)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
-	DBG("\t\tin %s VNMLA instruction is executed out of here.\n", __FUNCTION__);
-	//arch_arm_undef(cpu, bb, instr);
-	int m;
-	int n;
-	int d ;
-	int add = (BIT(6) == 0);
-	int s = BIT(8) == 0;
-	Value *mm;
-	Value *nn;
-	Value *tmp;
-	if(s){
-		m = BIT(5) | BITS(0,3) << 1;
-		n = BIT(7) | BITS(16,19) << 1;
-		d = BIT(22) | BITS(12,15) << 1;
-		mm = FR32(m);
-		nn = FR32(n);
-		tmp = FPMUL(nn,mm);
-		if(!add)
-			tmp = FPNEG32(tmp);
-		mm = FR32(d);
-		tmp = FPADD(FPNEG32(mm),tmp);
-		//LETS(d,tmp);
-		LETFPS(d,tmp);
-	}else {
-		m = BITS(0,3) | BIT(5) << 4;
-		n = BITS(16,19) | BIT(7) << 4;
-		d = BIT(22) << 4 | BITS(12,15);
-		//mm = SITOFP(32,RSPR(m));
-		//LETS(d,tmp);
-		mm = ZEXT64(IBITCAST32(FR32(2 * m)));
-		nn = ZEXT64(IBITCAST32(FR32(2 * m  + 1)));
-		tmp = OR(SHL(nn,CONST64(32)),mm);
-		mm = FPBITCAST64(tmp);
-		tmp = ZEXT64(IBITCAST32(FR32(2 * n)));
-		nn = ZEXT64(IBITCAST32(FR32(2 * n  + 1)));
-		nn = OR(SHL(nn,CONST64(32)),tmp);
-		nn = FPBITCAST64(nn);
-		tmp = FPMUL(nn,mm);
-		if(!add)
-			tmp = FPNEG64(tmp);
-		mm = ZEXT64(IBITCAST32(FR32(2 * d)));
-		nn = ZEXT64(IBITCAST32(FR32(2 * d  + 1)));
-		mm = OR(SHL(nn,CONST64(32)),mm);
-		mm = FPBITCAST64(mm);
-		tmp = FPADD(FPNEG64(mm),tmp);
-		mm = TRUNC32(LSHR(IBITCAST64(tmp),CONST64(32)));	
-		nn = TRUNC32(AND(IBITCAST64(tmp),CONST64(0xffffffff)));
-		LETFPS(2*d ,FPBITCAST32(nn));
-		LETFPS(d*2 + 1 , FPBITCAST32(mm));
-	}
-	return No_exp;
+    DBG("\t\tin %s VNMLA instruction is executed out of here.\n", __FUNCTION__);
+    //arch_arm_undef(cpu, bb, instr);
+    int m;
+    int n;
+    int d ;
+    int add = (BIT(6) == 0);
+    int s = BIT(8) == 0;
+    Value *mm;
+    Value *nn;
+    Value *tmp;
+    if(s){
+        m = BIT(5) | BITS(0,3) << 1;
+        n = BIT(7) | BITS(16,19) << 1;
+        d = BIT(22) | BITS(12,15) << 1;
+        mm = FR32(m);
+        nn = FR32(n);
+        tmp = FPMUL(nn,mm);
+        if(!add)
+            tmp = FPNEG32(tmp);
+        mm = FR32(d);
+        tmp = FPADD(FPNEG32(mm),tmp);
+        //LETS(d,tmp);
+        LETFPS(d,tmp);
+    }else {
+        m = BITS(0,3) | BIT(5) << 4;
+        n = BITS(16,19) | BIT(7) << 4;
+        d = BIT(22) << 4 | BITS(12,15);
+        //mm = SITOFP(32,RSPR(m));
+        //LETS(d,tmp);
+        mm = ZEXT64(IBITCAST32(FR32(2 * m)));
+        nn = ZEXT64(IBITCAST32(FR32(2 * m  + 1)));
+        tmp = OR(SHL(nn,CONST64(32)),mm);
+        mm = FPBITCAST64(tmp);
+        tmp = ZEXT64(IBITCAST32(FR32(2 * n)));
+        nn = ZEXT64(IBITCAST32(FR32(2 * n  + 1)));
+        nn = OR(SHL(nn,CONST64(32)),tmp);
+        nn = FPBITCAST64(nn);
+        tmp = FPMUL(nn,mm);
+        if(!add)
+            tmp = FPNEG64(tmp);
+        mm = ZEXT64(IBITCAST32(FR32(2 * d)));
+        nn = ZEXT64(IBITCAST32(FR32(2 * d  + 1)));
+        mm = OR(SHL(nn,CONST64(32)),mm);
+        mm = FPBITCAST64(mm);
+        tmp = FPADD(FPNEG64(mm),tmp);
+        mm = TRUNC32(LSHR(IBITCAST64(tmp),CONST64(32)));    
+        nn = TRUNC32(AND(IBITCAST64(tmp),CONST64(0xffffffff)));
+        LETFPS(2*d ,FPBITCAST32(nn));
+        LETFPS(d*2 + 1 , FPBITCAST32(mm));
+    }
+    return No_exp;
 }
 #endif
 
@@ -401,52 +385,52 @@ int DYNCOM_TRANS(vnmla)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
 
 #ifdef VFP_INTERPRETER_STRUCT
 typedef struct _vnmls_inst {
-	unsigned int instr;
-	unsigned int dp_operation;
+    unsigned int instr;
+    unsigned int dp_operation;
 } vnmls_inst;
 #endif
 #ifdef VFP_INTERPRETER_TRANS
 ARM_INST_PTR INTERPRETER_TRANSLATE(vnmls)(unsigned int inst, int index)
 {
-	VFP_DEBUG_TRANSLATE;
-	
-	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vnmls_inst));
-	vnmls_inst *inst_cream = (vnmls_inst *)inst_base->component;
+    VFP_DEBUG_TRANSLATE;
 
-	inst_base->cond  = BITS(inst, 28, 31);
-	inst_base->idx	 = index;
-	inst_base->br	 = NON_BRANCH;
-	inst_base->load_r15 = 0;
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vnmls_inst));
+    vnmls_inst *inst_cream = (vnmls_inst *)inst_base->component;
 
-	inst_cream->dp_operation = BIT(inst, 8);
-	inst_cream->instr = inst;
-	
-	return inst_base;
+    inst_base->cond  = BITS(inst, 28, 31);
+    inst_base->idx     = index;
+    inst_base->br     = NON_BRANCH;
+    inst_base->load_r15 = 0;
+
+    inst_cream->dp_operation = BIT(inst, 8);
+    inst_cream->instr = inst;
+
+    return inst_base;
 }
 #endif
 #ifdef VFP_INTERPRETER_IMPL
 VNMLS_INST:
 {
-	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
-		CHECK_VFP_ENABLED;
-		
-		DBG("VNMLS :\n");
+    if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        DBG("VNMLS :\n");
+
+        vnmls_inst *inst_cream = (vnmls_inst *)inst_base->component;
 
-		vnmls_inst *inst_cream = (vnmls_inst *)inst_base->component;
+        int ret;
 
-		int ret;
-		
-		if (inst_cream->dp_operation)
-			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
-		else
-			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+        if (inst_cream->dp_operation)
+            ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+        else
+            ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
 
-		CHECK_VFP_CDP_RET;
-	}
-	cpu->Reg[15] += GET_INST_SIZE(cpu);
-	INC_PC(sizeof(vnmls_inst));
-	FETCH_INST;
-	GOTO_NEXT_INST;
+        CHECK_VFP_CDP_RET;
+    }
+    cpu->Reg[15] += GET_INST_SIZE(cpu);
+    INC_PC(sizeof(vnmls_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
 }
 #endif
 
@@ -456,66 +440,66 @@ DYNCOM_FILL_ACTION(vnmls),
 #ifdef VFP_DYNCOM_TAG
 int DYNCOM_TAG(vnmls)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
 {
-	int instr_size = INSTR_SIZE;
-	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
-	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
-	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
-	return instr_size;
+    int instr_size = INSTR_SIZE;
+    DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+    //arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+    arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+    return instr_size;
 }
 #endif
 #ifdef VFP_DYNCOM_TRANS
 int DYNCOM_TRANS(vnmls)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
-	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
-	//arch_arm_undef(cpu, bb, instr);
-	int m;
-	int n;
-	int d ;
-	int add = (BIT(6) == 0);
-	int s = BIT(8) == 0;
-	Value *mm;
-	Value *nn;
-	Value *tmp;
-	if(s){
-		m = BIT(5) | BITS(0,3) << 1;
-		n = BIT(7) | BITS(16,19) << 1;
-		d = BIT(22) | BITS(12,15) << 1;
-		mm = FR32(m);
-		nn = FR32(n);
-		tmp = FPMUL(nn,mm);
-		if(!add)
-			tmp = FPNEG32(tmp);
-		mm = FR32(d);
-		tmp = FPADD(FPNEG32(mm),tmp);
-		//LETS(d,tmp);
-		LETFPS(d,tmp);
-	}else {
-		m = BITS(0,3) | BIT(5) << 4;
-		n = BITS(16,19) | BIT(7) << 4;
-		d = BIT(22) << 4 | BITS(12,15);
-		//mm = SITOFP(32,RSPR(m));
-		//LETS(d,tmp);
-		mm = ZEXT64(IBITCAST32(FR32(2 * m)));
-		nn = ZEXT64(IBITCAST32(FR32(2 * m  + 1)));
-		tmp = OR(SHL(nn,CONST64(32)),mm);
-		mm = FPBITCAST64(tmp);
-		tmp = ZEXT64(IBITCAST32(FR32(2 * n)));
-		nn = ZEXT64(IBITCAST32(FR32(2 * n  + 1)));
-		nn = OR(SHL(nn,CONST64(32)),tmp);
-		nn = FPBITCAST64(nn);
-		tmp = FPMUL(nn,mm);
-		if(!add)
-			tmp = FPNEG64(tmp);
-		mm = ZEXT64(IBITCAST32(FR32(2 * d)));
-		nn = ZEXT64(IBITCAST32(FR32(2 * d  + 1)));
-		mm = OR(SHL(nn,CONST64(32)),mm);
-		mm = FPBITCAST64(mm);
-		tmp = FPADD(FPNEG64(mm),tmp);
-		mm = TRUNC32(LSHR(IBITCAST64(tmp),CONST64(32)));
-		nn = TRUNC32(AND(IBITCAST64(tmp),CONST64(0xffffffff)));	
-		LETFPS(2*d ,FPBITCAST32(nn));
-		LETFPS(d*2 + 1 , FPBITCAST32(mm));
-	}	
-	return No_exp;
+    DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+    //arch_arm_undef(cpu, bb, instr);
+    int m;
+    int n;
+    int d ;
+    int add = (BIT(6) == 0);
+    int s = BIT(8) == 0;
+    Value *mm;
+    Value *nn;
+    Value *tmp;
+    if(s){
+        m = BIT(5) | BITS(0,3) << 1;
+        n = BIT(7) | BITS(16,19) << 1;
+        d = BIT(22) | BITS(12,15) << 1;
+        mm = FR32(m);
+        nn = FR32(n);
+        tmp = FPMUL(nn,mm);
+        if(!add)
+            tmp = FPNEG32(tmp);
+        mm = FR32(d);
+        tmp = FPADD(FPNEG32(mm),tmp);
+        //LETS(d,tmp);
+        LETFPS(d,tmp);
+    }else {
+        m = BITS(0,3) | BIT(5) << 4;
+        n = BITS(16,19) | BIT(7) << 4;
+        d = BIT(22) << 4 | BITS(12,15);
+        //mm = SITOFP(32,RSPR(m));
+        //LETS(d,tmp);
+        mm = ZEXT64(IBITCAST32(FR32(2 * m)));
+        nn = ZEXT64(IBITCAST32(FR32(2 * m  + 1)));
+        tmp = OR(SHL(nn,CONST64(32)),mm);
+        mm = FPBITCAST64(tmp);
+        tmp = ZEXT64(IBITCAST32(FR32(2 * n)));
+        nn = ZEXT64(IBITCAST32(FR32(2 * n  + 1)));
+        nn = OR(SHL(nn,CONST64(32)),tmp);
+        nn = FPBITCAST64(nn);
+        tmp = FPMUL(nn,mm);
+        if(!add)
+            tmp = FPNEG64(tmp);
+        mm = ZEXT64(IBITCAST32(FR32(2 * d)));
+        nn = ZEXT64(IBITCAST32(FR32(2 * d  + 1)));
+        mm = OR(SHL(nn,CONST64(32)),mm);
+        mm = FPBITCAST64(mm);
+        tmp = FPADD(FPNEG64(mm),tmp);
+        mm = TRUNC32(LSHR(IBITCAST64(tmp),CONST64(32)));
+        nn = TRUNC32(AND(IBITCAST64(tmp),CONST64(0xffffffff)));    
+        LETFPS(2*d ,FPBITCAST32(nn));
+        LETFPS(d*2 + 1 , FPBITCAST32(mm));
+    }    
+    return No_exp;
 }
 #endif
 
@@ -524,52 +508,52 @@ int DYNCOM_TRANS(vnmls)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
 /* cond 1110 0D10 Vn-- Vd-- 101X N0M0 Vm-- */
 #ifdef VFP_INTERPRETER_STRUCT
 typedef struct _vnmul_inst {
-	unsigned int instr;
-	unsigned int dp_operation;
+    unsigned int instr;
+    unsigned int dp_operation;
 } vnmul_inst;
 #endif
 #ifdef VFP_INTERPRETER_TRANS
 ARM_INST_PTR INTERPRETER_TRANSLATE(vnmul)(unsigned int inst, int index)
 {
-	VFP_DEBUG_TRANSLATE;
-	
-	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vnmul_inst));
-	vnmul_inst *inst_cream = (vnmul_inst *)inst_base->component;
+    VFP_DEBUG_TRANSLATE;
 
-	inst_base->cond  = BITS(inst, 28, 31);
-	inst_base->idx	 = index;
-	inst_base->br	 = NON_BRANCH;
-	inst_base->load_r15 = 0;
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vnmul_inst));
+    vnmul_inst *inst_cream = (vnmul_inst *)inst_base->component;
 
-	inst_cream->dp_operation = BIT(inst, 8);
-	inst_cream->instr = inst;
-	
-	return inst_base;
+    inst_base->cond  = BITS(inst, 28, 31);
+    inst_base->idx     = index;
+    inst_base->br     = NON_BRANCH;
+    inst_base->load_r15 = 0;
+
+    inst_cream->dp_operation = BIT(inst, 8);
+    inst_cream->instr = inst;
+
+    return inst_base;
 }
 #endif
 #ifdef VFP_INTERPRETER_IMPL
 VNMUL_INST:
 {
-	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
-		CHECK_VFP_ENABLED;
-		
-		DBG("VNMUL :\n");
+    if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        DBG("VNMUL :\n");
+
+        vnmul_inst *inst_cream = (vnmul_inst *)inst_base->component;
 
-		vnmul_inst *inst_cream = (vnmul_inst *)inst_base->component;
+        int ret;
 
-		int ret;
-		
-		if (inst_cream->dp_operation)
-			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
-		else
-			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+        if (inst_cream->dp_operation)
+            ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+        else
+            ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
 
-		CHECK_VFP_CDP_RET;
-	}
-	cpu->Reg[15] += GET_INST_SIZE(cpu);
-	INC_PC(sizeof(vnmul_inst));
-	FETCH_INST;
-	GOTO_NEXT_INST;
+        CHECK_VFP_CDP_RET;
+    }
+    cpu->Reg[15] += GET_INST_SIZE(cpu);
+    INC_PC(sizeof(vnmul_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
 }
 #endif
 
@@ -579,56 +563,56 @@ DYNCOM_FILL_ACTION(vnmul),
 #ifdef VFP_DYNCOM_TAG
 int DYNCOM_TAG(vnmul)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
 {
-	int instr_size = INSTR_SIZE;
-	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
-	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
-	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
-	return instr_size;
-}		
+    int instr_size = INSTR_SIZE;
+    DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+    //arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+    arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+    return instr_size;
+}        
 #endif
 #ifdef VFP_DYNCOM_TRANS
 int DYNCOM_TRANS(vnmul)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
-	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
-	//arch_arm_undef(cpu, bb, instr);
-	int m;
-	int n;
-	int d ;
-	int add = (BIT(6) == 0);
-	int s = BIT(8) == 0;
-	Value *mm;
-	Value *nn;
-	Value *tmp;
-	if(s){
-		m = BIT(5) | BITS(0,3) << 1;
-		n = BIT(7) | BITS(16,19) << 1;
-		d = BIT(22) | BITS(12,15) << 1;
-		mm = FR32(m);
-		nn = FR32(n);
-		tmp = FPMUL(nn,mm);
-		//LETS(d,tmp);
-		LETFPS(d,FPNEG32(tmp));
-	}else {
-		m = BITS(0,3) | BIT(5) << 4;
-		n = BITS(16,19) | BIT(7) << 4;
-		d = BIT(22) << 4 | BITS(12,15);
-		//mm = SITOFP(32,RSPR(m));
-		//LETS(d,tmp);
-		mm = ZEXT64(IBITCAST32(FR32(2 * m)));
-		nn = ZEXT64(IBITCAST32(FR32(2 * m  + 1)));
-		tmp = OR(SHL(nn,CONST64(32)),mm);
-		mm = FPBITCAST64(tmp);
-		tmp = ZEXT64(IBITCAST32(FR32(2 * n)));
-		nn = ZEXT64(IBITCAST32(FR32(2 * n  + 1)));
-		nn = OR(SHL(nn,CONST64(32)),tmp);
-		nn = FPBITCAST64(nn);
-		tmp = FPMUL(nn,mm);
-		tmp = FPNEG64(tmp);
-		mm = TRUNC32(LSHR(IBITCAST64(tmp),CONST64(32)));
-		nn = TRUNC32(AND(IBITCAST64(tmp),CONST64(0xffffffff)));	
-		LETFPS(2*d ,FPBITCAST32(nn));
-		LETFPS(d*2 + 1 , FPBITCAST32(mm));
-	}
-	return No_exp;
+    DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+    //arch_arm_undef(cpu, bb, instr);
+    int m;
+    int n;
+    int d ;
+    int add = (BIT(6) == 0);
+    int s = BIT(8) == 0;
+    Value *mm;
+    Value *nn;
+    Value *tmp;
+    if(s){
+        m = BIT(5) | BITS(0,3) << 1;
+        n = BIT(7) | BITS(16,19) << 1;
+        d = BIT(22) | BITS(12,15) << 1;
+        mm = FR32(m);
+        nn = FR32(n);
+        tmp = FPMUL(nn,mm);
+        //LETS(d,tmp);
+        LETFPS(d,FPNEG32(tmp));
+    }else {
+        m = BITS(0,3) | BIT(5) << 4;
+        n = BITS(16,19) | BIT(7) << 4;
+        d = BIT(22) << 4 | BITS(12,15);
+        //mm = SITOFP(32,RSPR(m));
+        //LETS(d,tmp);
+        mm = ZEXT64(IBITCAST32(FR32(2 * m)));
+        nn = ZEXT64(IBITCAST32(FR32(2 * m  + 1)));
+        tmp = OR(SHL(nn,CONST64(32)),mm);
+        mm = FPBITCAST64(tmp);
+        tmp = ZEXT64(IBITCAST32(FR32(2 * n)));
+        nn = ZEXT64(IBITCAST32(FR32(2 * n  + 1)));
+        nn = OR(SHL(nn,CONST64(32)),tmp);
+        nn = FPBITCAST64(nn);
+        tmp = FPMUL(nn,mm);
+        tmp = FPNEG64(tmp);
+        mm = TRUNC32(LSHR(IBITCAST64(tmp),CONST64(32)));
+        nn = TRUNC32(AND(IBITCAST64(tmp),CONST64(0xffffffff)));    
+        LETFPS(2*d ,FPBITCAST32(nn));
+        LETFPS(d*2 + 1 , FPBITCAST32(mm));
+    }
+    return No_exp;
 }
 #endif
 
@@ -638,52 +622,52 @@ int DYNCOM_TRANS(vnmul)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
 /* cond 1110 0D10 Vn-- Vd-- 101X N0M0 Vm-- */
 #ifdef VFP_INTERPRETER_STRUCT
 typedef struct _vmul_inst {
-	unsigned int instr;
-	unsigned int dp_operation;
+    unsigned int instr;
+    unsigned int dp_operation;
 } vmul_inst;
 #endif
 #ifdef VFP_INTERPRETER_TRANS
 ARM_INST_PTR INTERPRETER_TRANSLATE(vmul)(unsigned int inst, int index)
 {
-	VFP_DEBUG_TRANSLATE;
-	
-	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vmul_inst));
-	vmul_inst *inst_cream = (vmul_inst *)inst_base->component;
+    VFP_DEBUG_TRANSLATE;
 
-	inst_base->cond  = BITS(inst, 28, 31);
-	inst_base->idx	 = index;
-	inst_base->br	 = NON_BRANCH;
-	inst_base->load_r15 = 0;
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vmul_inst));
+    vmul_inst *inst_cream = (vmul_inst *)inst_base->component;
 
-	inst_cream->dp_operation = BIT(inst, 8);
-	inst_cream->instr = inst;
-	
-	return inst_base;
+    inst_base->cond  = BITS(inst, 28, 31);
+    inst_base->idx     = index;
+    inst_base->br     = NON_BRANCH;
+    inst_base->load_r15 = 0;
+
+    inst_cream->dp_operation = BIT(inst, 8);
+    inst_cream->instr = inst;
+
+    return inst_base;
 }
 #endif
 #ifdef VFP_INTERPRETER_IMPL
 VMUL_INST:
 {
-	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
-		CHECK_VFP_ENABLED;
-		
-		DBG("VMUL :\n");
+    if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        DBG("VMUL :\n");
+
+        vmul_inst *inst_cream = (vmul_inst *)inst_base->component;
 
-		vmul_inst *inst_cream = (vmul_inst *)inst_base->component;
+        int ret;
 
-		int ret;
-		
-		if (inst_cream->dp_operation)
-			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
-		else
-			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+        if (inst_cream->dp_operation)
+            ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+        else
+            ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
 
-		CHECK_VFP_CDP_RET;
-	}
-	cpu->Reg[15] += GET_INST_SIZE(cpu);
-	INC_PC(sizeof(vmul_inst));
-	FETCH_INST;
-	GOTO_NEXT_INST;
+        CHECK_VFP_CDP_RET;
+    }
+    cpu->Reg[15] += GET_INST_SIZE(cpu);
+    INC_PC(sizeof(vmul_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
 }
 #endif
 
@@ -693,70 +677,70 @@ DYNCOM_FILL_ACTION(vmul),
 #ifdef VFP_DYNCOM_TAG
 int DYNCOM_TAG(vmul)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
 {
-	int instr_size = INSTR_SIZE;
-	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
-	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
-	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
-	return instr_size;
+    int instr_size = INSTR_SIZE;
+    //DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+    //arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+    arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+    return instr_size;
 }
 #endif
 #ifdef VFP_DYNCOM_TRANS
 int DYNCOM_TRANS(vmul)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
-	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
-	//printf("\n\n\t\tin %s instruction is executed out.\n\n", __FUNCTION__);
-	//arch_arm_undef(cpu, bb, instr);
-	int m;
-	int n;
-	int d ;
-	int s = BIT(8) == 0;
-	Value *mm;
-	Value *nn;
-	Value *tmp;
-	if(s){
-		m = BIT(5) | BITS(0,3) << 1;
-		n = BIT(7) | BITS(16,19) << 1;
-		d = BIT(22) | BITS(12,15) << 1;
-		//mm = SITOFP(32,FR(m));
-		//nn = SITOFP(32,FRn));
-		mm = FR32(m);
-		nn = FR32(n);
-		tmp = FPMUL(nn,mm);
-		//LETS(d,tmp);
-		LETFPS(d,tmp);
-	}else {
-		m = BITS(0,3) | BIT(5) << 4;
-		n = BITS(16,19) | BIT(7) << 4;
-		d = BIT(22) << 4 | BITS(12,15);
-		//mm = SITOFP(32,RSPR(m));
-		//LETS(d,tmp);
-		Value *lo = FR32(2 * m);
-		Value *hi = FR32(2 * m + 1);
-		hi = IBITCAST32(hi);
-		lo = IBITCAST32(lo);
-		Value *hi64 = ZEXT64(hi);
-		Value* lo64 = ZEXT64(lo);
-		Value* v64 = OR(SHL(hi64,CONST64(32)),lo64);
-		Value* m0 = FPBITCAST64(v64);
-		lo = FR32(2 * n);
-		hi = FR32(2 * n + 1);
-		hi = IBITCAST32(hi);
-		lo = IBITCAST32(lo);
-		hi64 = ZEXT64(hi);
-		lo64 = ZEXT64(lo);
-		v64 = OR(SHL(hi64,CONST64(32)),lo64);
-		Value *n0 = FPBITCAST64(v64); 
-		tmp = FPMUL(n0,m0);
-		Value *val64 = IBITCAST64(tmp);
-		hi = LSHR(val64,CONST64(32));
-		lo = AND(val64,CONST64(0xffffffff));
-		hi = TRUNC32(hi);
-		lo  = TRUNC32(lo);
-		hi = FPBITCAST32(hi);
-		lo = FPBITCAST32(lo);		
-		LETFPS(2*d ,lo);
-		LETFPS(d*2 + 1 , hi);
-	}
-	return No_exp;
+    DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+    //printf("\n\n\t\tin %s instruction is executed out.\n\n", __FUNCTION__);
+    //arch_arm_undef(cpu, bb, instr);
+    int m;
+    int n;
+    int d ;
+    int s = BIT(8) == 0;
+    Value *mm;
+    Value *nn;
+    Value *tmp;
+    if(s){
+        m = BIT(5) | BITS(0,3) << 1;
+        n = BIT(7) | BITS(16,19) << 1;
+        d = BIT(22) | BITS(12,15) << 1;
+        //mm = SITOFP(32,FR(m));
+        //nn = SITOFP(32,FRn));
+        mm = FR32(m);
+        nn = FR32(n);
+        tmp = FPMUL(nn,mm);
+        //LETS(d,tmp);
+        LETFPS(d,tmp);
+    }else {
+        m = BITS(0,3) | BIT(5) << 4;
+        n = BITS(16,19) | BIT(7) << 4;
+        d = BIT(22) << 4 | BITS(12,15);
+        //mm = SITOFP(32,RSPR(m));
+        //LETS(d,tmp);
+        Value *lo = FR32(2 * m);
+        Value *hi = FR32(2 * m + 1);
+        hi = IBITCAST32(hi);
+        lo = IBITCAST32(lo);
+        Value *hi64 = ZEXT64(hi);
+        Value* lo64 = ZEXT64(lo);
+        Value* v64 = OR(SHL(hi64,CONST64(32)),lo64);
+        Value* m0 = FPBITCAST64(v64);
+        lo = FR32(2 * n);
+        hi = FR32(2 * n + 1);
+        hi = IBITCAST32(hi);
+        lo = IBITCAST32(lo);
+        hi64 = ZEXT64(hi);
+        lo64 = ZEXT64(lo);
+        v64 = OR(SHL(hi64,CONST64(32)),lo64);
+        Value *n0 = FPBITCAST64(v64); 
+        tmp = FPMUL(n0,m0);
+        Value *val64 = IBITCAST64(tmp);
+        hi = LSHR(val64,CONST64(32));
+        lo = AND(val64,CONST64(0xffffffff));
+        hi = TRUNC32(hi);
+        lo  = TRUNC32(lo);
+        hi = FPBITCAST32(hi);
+        lo = FPBITCAST32(lo);        
+        LETFPS(2*d ,lo);
+        LETFPS(d*2 + 1 , hi);
+    }
+    return No_exp;
 }
 #endif
 
@@ -765,52 +749,52 @@ int DYNCOM_TRANS(vmul)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
 /* cond 1110 0D11 Vn-- Vd-- 101X N0M0 Vm-- */
 #ifdef VFP_INTERPRETER_STRUCT
 typedef struct _vadd_inst {
-	unsigned int instr;
-	unsigned int dp_operation;
+    unsigned int instr;
+    unsigned int dp_operation;
 } vadd_inst;
 #endif
 #ifdef VFP_INTERPRETER_TRANS
 ARM_INST_PTR INTERPRETER_TRANSLATE(vadd)(unsigned int inst, int index)
 {
-	VFP_DEBUG_TRANSLATE;
-	
-	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vadd_inst));
-	vadd_inst *inst_cream = (vadd_inst *)inst_base->component;
+    VFP_DEBUG_TRANSLATE;
 
-	inst_base->cond  = BITS(inst, 28, 31);
-	inst_base->idx	 = index;
-	inst_base->br	 = NON_BRANCH;
-	inst_base->load_r15 = 0;
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vadd_inst));
+    vadd_inst *inst_cream = (vadd_inst *)inst_base->component;
 
-	inst_cream->dp_operation = BIT(inst, 8);
-	inst_cream->instr = inst;
-	
-	return inst_base;
+    inst_base->cond  = BITS(inst, 28, 31);
+    inst_base->idx     = index;
+    inst_base->br     = NON_BRANCH;
+    inst_base->load_r15 = 0;
+
+    inst_cream->dp_operation = BIT(inst, 8);
+    inst_cream->instr = inst;
+
+    return inst_base;
 }
 #endif
 #ifdef VFP_INTERPRETER_IMPL
 VADD_INST:
 {
-	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
-		CHECK_VFP_ENABLED;
-		
-		DBG("VADD :\n");
+    if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        DBG("VADD :\n");
 
-		vadd_inst *inst_cream = (vadd_inst *)inst_base->component;
+        vadd_inst *inst_cream = (vadd_inst *)inst_base->component;
 
-		int ret;
-		
-		if (inst_cream->dp_operation)
-			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
-		else
-			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+        int ret;
 
-		CHECK_VFP_CDP_RET;
-	}
-	cpu->Reg[15] += GET_INST_SIZE(cpu);
-	INC_PC(sizeof(vadd_inst));
-	FETCH_INST;
-	GOTO_NEXT_INST;
+        if (inst_cream->dp_operation)
+            ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+        else
+            ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+        CHECK_VFP_CDP_RET;
+    }
+    cpu->Reg[15] += GET_INST_SIZE(cpu);
+    INC_PC(sizeof(vadd_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
 }
 #endif
 
@@ -820,64 +804,64 @@ DYNCOM_FILL_ACTION(vadd),
 #ifdef VFP_DYNCOM_TAG
 int DYNCOM_TAG(vadd)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
 {
-	int instr_size = INSTR_SIZE;
-	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
-	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
-	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
-	return instr_size;
+    int instr_size = INSTR_SIZE;
+    DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+    //arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+    arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+    return instr_size;
 }
 #endif
 #ifdef VFP_DYNCOM_TRANS
 int DYNCOM_TRANS(vadd)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
-	DBG("\t\tin %s instruction will implement out of JIT.\n", __FUNCTION__);
-	//arch_arm_undef(cpu, bb, instr);
-	int m;
-	int n;
-	int d ;
-	int s = BIT(8) == 0;
-	Value *mm;
-	Value *nn;
-	Value *tmp;
-	if(s){
-		m = BIT(5) | BITS(0,3) << 1;
-		n = BIT(7) | BITS(16,19) << 1;
-		d = BIT(22) | BITS(12,15) << 1;
-		mm = FR32(m);
-		nn = FR32(n);
-		tmp = FPADD(nn,mm);
-		LETFPS(d,tmp);
-	}else {
-		m = BITS(0,3) | BIT(5) << 4;
-		n = BITS(16,19) | BIT(7) << 4;
-		d = BIT(22) << 4 | BITS(12,15);
-		Value *lo = FR32(2 * m);
-		Value *hi = FR32(2 * m + 1);
-		hi = IBITCAST32(hi);
-		lo = IBITCAST32(lo);
-		Value *hi64 = ZEXT64(hi);
-		Value* lo64 = ZEXT64(lo);
-		Value* v64 = OR(SHL(hi64,CONST64(32)),lo64);
-		Value* m0 = FPBITCAST64(v64);
-		lo = FR32(2 * n);
-		hi = FR32(2 * n + 1);
-		hi = IBITCAST32(hi);
-		lo = IBITCAST32(lo);
-		hi64 = ZEXT64(hi);
-		lo64 = ZEXT64(lo);
-		v64 = OR(SHL(hi64,CONST64(32)),lo64);
-		Value *n0 = FPBITCAST64(v64); 
-		tmp = FPADD(n0,m0);
-		Value *val64 = IBITCAST64(tmp);
-		hi = LSHR(val64,CONST64(32));
-		lo = AND(val64,CONST64(0xffffffff));
-		hi = TRUNC32(hi);
-		lo  = TRUNC32(lo);
-		hi = FPBITCAST32(hi);
-		lo = FPBITCAST32(lo);		
-		LETFPS(2*d ,lo);
-		LETFPS(d*2 + 1 , hi);
-	}
-	return No_exp;
+    DBG("\t\tin %s instruction will implement out of JIT.\n", __FUNCTION__);
+    //arch_arm_undef(cpu, bb, instr);
+    int m;
+    int n;
+    int d ;
+    int s = BIT(8) == 0;
+    Value *mm;
+    Value *nn;
+    Value *tmp;
+    if(s){
+        m = BIT(5) | BITS(0,3) << 1;
+        n = BIT(7) | BITS(16,19) << 1;
+        d = BIT(22) | BITS(12,15) << 1;
+        mm = FR32(m);
+        nn = FR32(n);
+        tmp = FPADD(nn,mm);
+        LETFPS(d,tmp);
+    }else {
+        m = BITS(0,3) | BIT(5) << 4;
+        n = BITS(16,19) | BIT(7) << 4;
+        d = BIT(22) << 4 | BITS(12,15);
+        Value *lo = FR32(2 * m);
+        Value *hi = FR32(2 * m + 1);
+        hi = IBITCAST32(hi);
+        lo = IBITCAST32(lo);
+        Value *hi64 = ZEXT64(hi);
+        Value* lo64 = ZEXT64(lo);
+        Value* v64 = OR(SHL(hi64,CONST64(32)),lo64);
+        Value* m0 = FPBITCAST64(v64);
+        lo = FR32(2 * n);
+        hi = FR32(2 * n + 1);
+        hi = IBITCAST32(hi);
+        lo = IBITCAST32(lo);
+        hi64 = ZEXT64(hi);
+        lo64 = ZEXT64(lo);
+        v64 = OR(SHL(hi64,CONST64(32)),lo64);
+        Value *n0 = FPBITCAST64(v64); 
+        tmp = FPADD(n0,m0);
+        Value *val64 = IBITCAST64(tmp);
+        hi = LSHR(val64,CONST64(32));
+        lo = AND(val64,CONST64(0xffffffff));
+        hi = TRUNC32(hi);
+        lo  = TRUNC32(lo);
+        hi = FPBITCAST32(hi);
+        lo = FPBITCAST32(lo);        
+        LETFPS(2*d ,lo);
+        LETFPS(d*2 + 1 , hi);
+    }
+    return No_exp;
 }
 #endif
 
@@ -886,52 +870,52 @@ int DYNCOM_TRANS(vadd)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
 /* cond 1110 0D11 Vn-- Vd-- 101X N1M0 Vm-- */
 #ifdef VFP_INTERPRETER_STRUCT
 typedef struct _vsub_inst {
-	unsigned int instr;
-	unsigned int dp_operation;
+    unsigned int instr;
+    unsigned int dp_operation;
 } vsub_inst;
 #endif
 #ifdef VFP_INTERPRETER_TRANS
 ARM_INST_PTR INTERPRETER_TRANSLATE(vsub)(unsigned int inst, int index)
 {
-	VFP_DEBUG_TRANSLATE;
-	
-	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vsub_inst));
-	vsub_inst *inst_cream = (vsub_inst *)inst_base->component;
+    VFP_DEBUG_TRANSLATE;
+
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vsub_inst));
+    vsub_inst *inst_cream = (vsub_inst *)inst_base->component;
 
-	inst_base->cond  = BITS(inst, 28, 31);
-	inst_base->idx	 = index;
-	inst_base->br	 = NON_BRANCH;
-	inst_base->load_r15 = 0;
+    inst_base->cond  = BITS(inst, 28, 31);
+    inst_base->idx     = index;
+    inst_base->br     = NON_BRANCH;
+    inst_base->load_r15 = 0;
 
-	inst_cream->dp_operation = BIT(inst, 8);
-	inst_cream->instr = inst;
-	
-	return inst_base;
+    inst_cream->dp_operation = BIT(inst, 8);
+    inst_cream->instr = inst;
+
+    return inst_base;
 }
 #endif
 #ifdef VFP_INTERPRETER_IMPL
 VSUB_INST:
 {
-	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
-		CHECK_VFP_ENABLED;
-		
-		DBG("VSUB :\n");
+    if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        DBG("VSUB :\n");
 
-		vsub_inst *inst_cream = (vsub_inst *)inst_base->component;
+        vsub_inst *inst_cream = (vsub_inst *)inst_base->component;
 
-		int ret;
-		
-		if (inst_cream->dp_operation)
-			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
-		else
-			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+        int ret;
 
-		CHECK_VFP_CDP_RET;
-	}
-	cpu->Reg[15] += GET_INST_SIZE(cpu);
-	INC_PC(sizeof(vsub_inst));
-	FETCH_INST;
-	GOTO_NEXT_INST;
+        if (inst_cream->dp_operation)
+            ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+        else
+            ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+        CHECK_VFP_CDP_RET;
+    }
+    cpu->Reg[15] += GET_INST_SIZE(cpu);
+    INC_PC(sizeof(vsub_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
 }
 #endif
 #ifdef VFP_DYNCOM_TABLE
@@ -940,63 +924,63 @@ DYNCOM_FILL_ACTION(vsub),
 #ifdef VFP_DYNCOM_TAG
 int DYNCOM_TAG(vsub)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
 {
-	int instr_size = INSTR_SIZE;
-	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
-	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
-	return instr_size;
+    int instr_size = INSTR_SIZE;
+    //arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+    arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+    return instr_size;
 }
 #endif
 #ifdef VFP_DYNCOM_TRANS
 int DYNCOM_TRANS(vsub)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
-	DBG("\t\tin %s instr=0x%x, instruction is executed out of JIT.\n", __FUNCTION__, instr);
-	//arch_arm_undef(cpu, bb, instr);
-	int m;
-	int n;
-	int d ;
-	int s = BIT(8) == 0;
-	Value *mm;
-	Value *nn;
-	Value *tmp;
-	if(s){
-		m = BIT(5) | BITS(0,3) << 1;
-		n = BIT(7) | BITS(16,19) << 1;
-		d = BIT(22) | BITS(12,15) << 1;
-		mm = FR32(m);
-		nn = FR32(n);
-		tmp = FPSUB(nn,mm);
-		LETFPS(d,tmp);
-	}else {
-		m = BITS(0,3) | BIT(5) << 4;
-		n = BITS(16,19) | BIT(7) << 4;
-		d = BIT(22) << 4 | BITS(12,15);
-		Value *lo = FR32(2 * m);
-		Value *hi = FR32(2 * m + 1);
-		hi = IBITCAST32(hi);
-		lo = IBITCAST32(lo);
-		Value *hi64 = ZEXT64(hi);
-		Value* lo64 = ZEXT64(lo);
-		Value* v64 = OR(SHL(hi64,CONST64(32)),lo64);
-		Value* m0 = FPBITCAST64(v64);
-		lo = FR32(2 * n);
-		hi = FR32(2 * n + 1);
-		hi = IBITCAST32(hi);
-		lo = IBITCAST32(lo);
-		hi64 = ZEXT64(hi);
-		lo64 = ZEXT64(lo);
-		v64 = OR(SHL(hi64,CONST64(32)),lo64);
-		Value *n0 = FPBITCAST64(v64); 
-		tmp = FPSUB(n0,m0);
-		Value *val64 = IBITCAST64(tmp);
-		hi = LSHR(val64,CONST64(32));
-		lo = AND(val64,CONST64(0xffffffff));
-		hi = TRUNC32(hi);
-		lo  = TRUNC32(lo);
-		hi = FPBITCAST32(hi);
-		lo = FPBITCAST32(lo);		
-		LETFPS(2*d ,lo);
-		LETFPS(d*2 + 1 , hi);
-	} 
-	return No_exp;
+    DBG("\t\tin %s instr=0x%x, instruction is executed out of JIT.\n", __FUNCTION__, instr);
+    //arch_arm_undef(cpu, bb, instr);
+    int m;
+    int n;
+    int d ;
+    int s = BIT(8) == 0;
+    Value *mm;
+    Value *nn;
+    Value *tmp;
+    if(s){
+        m = BIT(5) | BITS(0,3) << 1;
+        n = BIT(7) | BITS(16,19) << 1;
+        d = BIT(22) | BITS(12,15) << 1;
+        mm = FR32(m);
+        nn = FR32(n);
+        tmp = FPSUB(nn,mm);
+        LETFPS(d,tmp);
+    }else {
+        m = BITS(0,3) | BIT(5) << 4;
+        n = BITS(16,19) | BIT(7) << 4;
+        d = BIT(22) << 4 | BITS(12,15);
+        Value *lo = FR32(2 * m);
+        Value *hi = FR32(2 * m + 1);
+        hi = IBITCAST32(hi);
+        lo = IBITCAST32(lo);
+        Value *hi64 = ZEXT64(hi);
+        Value* lo64 = ZEXT64(lo);
+        Value* v64 = OR(SHL(hi64,CONST64(32)),lo64);
+        Value* m0 = FPBITCAST64(v64);
+        lo = FR32(2 * n);
+        hi = FR32(2 * n + 1);
+        hi = IBITCAST32(hi);
+        lo = IBITCAST32(lo);
+        hi64 = ZEXT64(hi);
+        lo64 = ZEXT64(lo);
+        v64 = OR(SHL(hi64,CONST64(32)),lo64);
+        Value *n0 = FPBITCAST64(v64); 
+        tmp = FPSUB(n0,m0);
+        Value *val64 = IBITCAST64(tmp);
+        hi = LSHR(val64,CONST64(32));
+        lo = AND(val64,CONST64(0xffffffff));
+        hi = TRUNC32(hi);
+        lo  = TRUNC32(lo);
+        hi = FPBITCAST32(hi);
+        lo = FPBITCAST32(lo);        
+        LETFPS(2*d ,lo);
+        LETFPS(d*2 + 1 , hi);
+    } 
+    return No_exp;
 }
 #endif
 
@@ -1005,52 +989,52 @@ int DYNCOM_TRANS(vsub)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
 /* cond 1110 1D00 Vn-- Vd-- 101X N0M0 Vm-- */
 #ifdef VFP_INTERPRETER_STRUCT
 typedef struct _vdiv_inst {
-	unsigned int instr;
-	unsigned int dp_operation;
+    unsigned int instr;
+    unsigned int dp_operation;
 } vdiv_inst;
 #endif
 #ifdef VFP_INTERPRETER_TRANS
 ARM_INST_PTR INTERPRETER_TRANSLATE(vdiv)(unsigned int inst, int index)
 {
-	VFP_DEBUG_TRANSLATE;
-	
-	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vdiv_inst));
-	vdiv_inst *inst_cream = (vdiv_inst *)inst_base->component;
+    VFP_DEBUG_TRANSLATE;
+
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vdiv_inst));
+    vdiv_inst *inst_cream = (vdiv_inst *)inst_base->component;
 
-	inst_base->cond  = BITS(inst, 28, 31);
-	inst_base->idx	 = index;
-	inst_base->br	 = NON_BRANCH;
-	inst_base->load_r15 = 0;
+    inst_base->cond  = BITS(inst, 28, 31);
+    inst_base->idx     = index;
+    inst_base->br     = NON_BRANCH;
+    inst_base->load_r15 = 0;
 
-	inst_cream->dp_operation = BIT(inst, 8);
-	inst_cream->instr = inst;
-	
-	return inst_base;
+    inst_cream->dp_operation = BIT(inst, 8);
+    inst_cream->instr = inst;
+
+    return inst_base;
 }
 #endif
 #ifdef VFP_INTERPRETER_IMPL
 VDIV_INST:
 {
-	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
-		CHECK_VFP_ENABLED;
-		
-		DBG("VDIV :\n");
+    if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        DBG("VDIV :\n");
 
-		vdiv_inst *inst_cream = (vdiv_inst *)inst_base->component;
+        vdiv_inst *inst_cream = (vdiv_inst *)inst_base->component;
 
-		int ret;
-		
-		if (inst_cream->dp_operation)
-			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
-		else
-			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+        int ret;
 
-		CHECK_VFP_CDP_RET;
-	}
-	cpu->Reg[15] += GET_INST_SIZE(cpu);
-	INC_PC(sizeof(vdiv_inst));
-	FETCH_INST;
-	GOTO_NEXT_INST;
+        if (inst_cream->dp_operation)
+            ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+        else
+            ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+        CHECK_VFP_CDP_RET;
+    }
+    cpu->Reg[15] += GET_INST_SIZE(cpu);
+    INC_PC(sizeof(vdiv_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
 }
 #endif
 
@@ -1060,64 +1044,64 @@ DYNCOM_FILL_ACTION(vdiv),
 #ifdef VFP_DYNCOM_TAG
 int DYNCOM_TAG(vdiv)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
 {
-	int instr_size = INSTR_SIZE;
-	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
-	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
-	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
-	return instr_size;
+    int instr_size = INSTR_SIZE;
+    DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+    //arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+    arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+    return instr_size;
 }
 #endif
 #ifdef VFP_DYNCOM_TRANS
 int DYNCOM_TRANS(vdiv)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
-	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
-	//arch_arm_undef(cpu, bb, instr);
-	int m;
-	int n;
-	int d ;
-	int s = BIT(8) == 0;
-	Value *mm;
-	Value *nn;
-	Value *tmp;
-	if(s){
-		m = BIT(5) | BITS(0,3) << 1;
-		n = BIT(7) | BITS(16,19) << 1;
-		d = BIT(22) | BITS(12,15) << 1;
-		mm = FR32(m);
-		nn = FR32(n);
-		tmp = FPDIV(nn,mm);
-		LETFPS(d,tmp);
-	}else {
-		m = BITS(0,3) | BIT(5) << 4;
-		n = BITS(16,19) | BIT(7) << 4;
-		d = BIT(22) << 4 | BITS(12,15);
-		Value *lo = FR32(2 * m);
-		Value *hi = FR32(2 * m + 1);
-		hi = IBITCAST32(hi);
-		lo = IBITCAST32(lo);
-		Value *hi64 = ZEXT64(hi);
-		Value* lo64 = ZEXT64(lo);
-		Value* v64 = OR(SHL(hi64,CONST64(32)),lo64);
-		Value* m0 = FPBITCAST64(v64);
-		lo = FR32(2 * n);
-		hi = FR32(2 * n + 1);
-		hi = IBITCAST32(hi);
-		lo = IBITCAST32(lo);
-		hi64 = ZEXT64(hi);
-		lo64 = ZEXT64(lo);
-		v64 = OR(SHL(hi64,CONST64(32)),lo64);
-		Value *n0 = FPBITCAST64(v64); 
-		tmp = FPDIV(n0,m0);
-		Value *val64 = IBITCAST64(tmp);
-		hi = LSHR(val64,CONST64(32));
-		lo = AND(val64,CONST64(0xffffffff));
-		hi = TRUNC32(hi);
-		lo  = TRUNC32(lo);
-		hi = FPBITCAST32(hi);
-		lo = FPBITCAST32(lo);		
-		LETFPS(2*d ,lo);
-		LETFPS(d*2 + 1 , hi);
-	} 		
-	return No_exp;
+    DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+    //arch_arm_undef(cpu, bb, instr);
+    int m;
+    int n;
+    int d ;
+    int s = BIT(8) == 0;
+    Value *mm;
+    Value *nn;
+    Value *tmp;
+    if(s){
+        m = BIT(5) | BITS(0,3) << 1;
+        n = BIT(7) | BITS(16,19) << 1;
+        d = BIT(22) | BITS(12,15) << 1;
+        mm = FR32(m);
+        nn = FR32(n);
+        tmp = FPDIV(nn,mm);
+        LETFPS(d,tmp);
+    }else {
+        m = BITS(0,3) | BIT(5) << 4;
+        n = BITS(16,19) | BIT(7) << 4;
+        d = BIT(22) << 4 | BITS(12,15);
+        Value *lo = FR32(2 * m);
+        Value *hi = FR32(2 * m + 1);
+        hi = IBITCAST32(hi);
+        lo = IBITCAST32(lo);
+        Value *hi64 = ZEXT64(hi);
+        Value* lo64 = ZEXT64(lo);
+        Value* v64 = OR(SHL(hi64,CONST64(32)),lo64);
+        Value* m0 = FPBITCAST64(v64);
+        lo = FR32(2 * n);
+        hi = FR32(2 * n + 1);
+        hi = IBITCAST32(hi);
+        lo = IBITCAST32(lo);
+        hi64 = ZEXT64(hi);
+        lo64 = ZEXT64(lo);
+        v64 = OR(SHL(hi64,CONST64(32)),lo64);
+        Value *n0 = FPBITCAST64(v64); 
+        tmp = FPDIV(n0,m0);
+        Value *val64 = IBITCAST64(tmp);
+        hi = LSHR(val64,CONST64(32));
+        lo = AND(val64,CONST64(0xffffffff));
+        hi = TRUNC32(hi);
+        lo  = TRUNC32(lo);
+        hi = FPBITCAST32(hi);
+        lo = FPBITCAST32(lo);        
+        LETFPS(2*d ,lo);
+        LETFPS(d*2 + 1 , hi);
+    }         
+    return No_exp;
 }
 #endif
 
@@ -1127,48 +1111,48 @@ int DYNCOM_TRANS(vdiv)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
 /* cond 1110 opc1 CRn- CRd- copr op20 CRm- CDP */
 #ifdef VFP_INTERPRETER_STRUCT
 typedef struct _vmovi_inst {
-	unsigned int single;
-	unsigned int d;
-	unsigned int imm;
+    unsigned int single;
+    unsigned int d;
+    unsigned int imm;
 } vmovi_inst;
 #endif
 #ifdef VFP_INTERPRETER_TRANS
 ARM_INST_PTR INTERPRETER_TRANSLATE(vmovi)(unsigned int inst, int index)
 {
-	VFP_DEBUG_TRANSLATE;
-	
-	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vmovi_inst));
-	vmovi_inst *inst_cream = (vmovi_inst *)inst_base->component;
-
-	inst_base->cond  = BITS(inst, 28, 31);
-	inst_base->idx	 = index;
-	inst_base->br	 = NON_BRANCH;
-	inst_base->load_r15 = 0;
-	
-	inst_cream->single   = BIT(inst, 8) == 0;
-	inst_cream->d        = (inst_cream->single ? BITS(inst,12,15)<<1 | BIT(inst,22) : BITS(inst,12,15) | BIT(inst,22)<<4);
-	unsigned int imm8 = BITS(inst, 16, 19) << 4 | BITS(inst, 0, 3);
-	if (inst_cream->single)
-		inst_cream->imm = BIT(imm8, 7)<<31 | (BIT(imm8, 6)==0)<<30 | (BIT(imm8, 6) ? 0x1f : 0)<<25 | BITS(imm8, 0, 5)<<19;
-	else
-		inst_cream->imm = BIT(imm8, 7)<<31 | (BIT(imm8, 6)==0)<<30 | (BIT(imm8, 6) ? 0xff : 0)<<22 | BITS(imm8, 0, 5)<<16;
-	return inst_base;
+    VFP_DEBUG_TRANSLATE;
+
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vmovi_inst));
+    vmovi_inst *inst_cream = (vmovi_inst *)inst_base->component;
+
+    inst_base->cond  = BITS(inst, 28, 31);
+    inst_base->idx     = index;
+    inst_base->br     = NON_BRANCH;
+    inst_base->load_r15 = 0;
+
+    inst_cream->single   = BIT(inst, 8) == 0;
+    inst_cream->d        = (inst_cream->single ? BITS(inst,12,15)<<1 | BIT(inst,22) : BITS(inst,12,15) | BIT(inst,22)<<4);
+    unsigned int imm8 = BITS(inst, 16, 19) << 4 | BITS(inst, 0, 3);
+    if (inst_cream->single)
+        inst_cream->imm = BIT(imm8, 7)<<31 | (BIT(imm8, 6)==0)<<30 | (BIT(imm8, 6) ? 0x1f : 0)<<25 | BITS(imm8, 0, 5)<<19;
+    else
+        inst_cream->imm = BIT(imm8, 7)<<31 | (BIT(imm8, 6)==0)<<30 | (BIT(imm8, 6) ? 0xff : 0)<<22 | BITS(imm8, 0, 5)<<16;
+    return inst_base;
 }
 #endif
 #ifdef VFP_INTERPRETER_IMPL
 VMOVI_INST:
 {
-	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
-		CHECK_VFP_ENABLED;
-		
-		vmovi_inst *inst_cream = (vmovi_inst *)inst_base->component;
+    if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
 
-		VMOVI(cpu, inst_cream->single, inst_cream->d, inst_cream->imm);
-	}
-	cpu->Reg[15] += GET_INST_SIZE(cpu);
-	INC_PC(sizeof(vmovi_inst));
-	FETCH_INST;
-	GOTO_NEXT_INST;
+        vmovi_inst *inst_cream = (vmovi_inst *)inst_base->component;
+
+        VMOVI(cpu, inst_cream->single, inst_cream->d, inst_cream->imm);
+    }
+    cpu->Reg[15] += GET_INST_SIZE(cpu);
+    INC_PC(sizeof(vmovi_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
 }
 #endif
 
@@ -1178,37 +1162,37 @@ DYNCOM_FILL_ACTION(vmovi),
 #ifdef VFP_DYNCOM_TAG
 int DYNCOM_TAG(vmovi)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
 {
-	int instr_size = INSTR_SIZE;
-	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
-	arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
-	return instr_size;
+    int instr_size = INSTR_SIZE;
+    DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+    arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+    return instr_size;
 }
 #endif
 #ifdef VFP_DYNCOM_TRANS
 int DYNCOM_TRANS(vmovi)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
-	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
-	//arch_arm_undef(cpu, bb, instr);
-	int single = (BIT(8) == 0);
-	int d;
-	int imm32;
-	Value *v;
-	Value *tmp;
-	v = CONST32(BITS(0,3) | BITS(16,19) << 4);
-	//v = CONST64(0x3ff0000000000000);
-	if(single){
-		d = BIT(22) | BITS(12,15) << 1;
-	}else {
-		d = BITS(12,15) | BIT(22) << 4;
-	}
-	if(single){
-		LETFPS(d,FPBITCAST32(v));
-	}else {
-		//v = UITOFP(64,v);
-		//tmp = IBITCAST64(v);
-		LETFPS(d*2 ,FPBITCAST32(TRUNC32(AND(v,CONST64(0xffffffff)))));
-		LETFPS(d * 2 + 1,FPBITCAST32(TRUNC32(LSHR(v,CONST64(32)))));
-	}
-	return No_exp;
+    DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+    //arch_arm_undef(cpu, bb, instr);
+    int single = (BIT(8) == 0);
+    int d;
+    int imm32;
+    Value *v;
+    Value *tmp;
+    v = CONST32(BITS(0,3) | BITS(16,19) << 4);
+    //v = CONST64(0x3ff0000000000000);
+    if(single){
+        d = BIT(22) | BITS(12,15) << 1;
+    }else {
+        d = BITS(12,15) | BIT(22) << 4;
+    }
+    if(single){
+        LETFPS(d,FPBITCAST32(v));
+    }else {
+        //v = UITOFP(64,v);
+        //tmp = IBITCAST64(v);
+        LETFPS(d*2 ,FPBITCAST32(TRUNC32(AND(v,CONST64(0xffffffff)))));
+        LETFPS(d * 2 + 1,FPBITCAST32(TRUNC32(LSHR(v,CONST64(32)))));
+    }
+    return No_exp;
 }
 #endif
 
@@ -1218,45 +1202,44 @@ int DYNCOM_TRANS(vmovi)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
 /* cond 1110 opc1 CRn- CRd- copr op20 CRm- CDP */
 #ifdef VFP_INTERPRETER_STRUCT
 typedef struct _vmovr_inst {
-	unsigned int single;
-	unsigned int d;
-	unsigned int m;
+    unsigned int single;
+    unsigned int d;
+    unsigned int m;
 } vmovr_inst;
 #endif
 #ifdef VFP_INTERPRETER_TRANS
 ARM_INST_PTR INTERPRETER_TRANSLATE(vmovr)(unsigned int inst, int index)
 {
-	VFP_DEBUG_TRANSLATE;
-	VFP_DEBUG_UNTESTED(VMOVR);
-	
-	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vmovr_inst));
-	vmovr_inst *inst_cream = (vmovr_inst *)inst_base->component;
+    VFP_DEBUG_TRANSLATE;
+
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vmovr_inst));
+    vmovr_inst *inst_cream = (vmovr_inst *)inst_base->component;
+
+    inst_base->cond  = BITS(inst, 28, 31);
+    inst_base->idx     = index;
+    inst_base->br     = NON_BRANCH;
+    inst_base->load_r15 = 0;
 
-	inst_base->cond  = BITS(inst, 28, 31);
-	inst_base->idx	 = index;
-	inst_base->br	 = NON_BRANCH;
-	inst_base->load_r15 = 0;
-	
-	inst_cream->single   = BIT(inst, 8) == 0;
-	inst_cream->d        = (inst_cream->single ? BITS(inst,12,15)<<1 | BIT(inst,22) : BITS(inst,12,15) | BIT(inst,22)<<4);
-	inst_cream->m        = (inst_cream->single ? BITS(inst, 0, 3)<<1 | BIT(inst, 5) : BITS(inst, 0, 3) | BIT(inst, 5)<<4);
-	return inst_base;
+    inst_cream->single   = BIT(inst, 8) == 0;
+    inst_cream->d        = (inst_cream->single ? BITS(inst,12,15)<<1 | BIT(inst,22) : BITS(inst,12,15) | BIT(inst,22)<<4);
+    inst_cream->m        = (inst_cream->single ? BITS(inst, 0, 3)<<1 | BIT(inst, 5) : BITS(inst, 0, 3) | BIT(inst, 5)<<4);
+    return inst_base;
 }
 #endif
 #ifdef VFP_INTERPRETER_IMPL
 VMOVR_INST:
 {
-	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
-		CHECK_VFP_ENABLED;
-		
-		vmovr_inst *inst_cream = (vmovr_inst *)inst_base->component;
+    if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
 
-		VMOVR(cpu, inst_cream->single, inst_cream->d, inst_cream->m);
-	}
-	cpu->Reg[15] += GET_INST_SIZE(cpu);
-	INC_PC(sizeof(vmovr_inst));
-	FETCH_INST;
-	GOTO_NEXT_INST;
+        vmovr_inst *inst_cream = (vmovr_inst *)inst_base->component;
+
+        VMOVR(cpu, inst_cream->single, inst_cream->d, inst_cream->m);
+    }
+    cpu->Reg[15] += GET_INST_SIZE(cpu);
+    INC_PC(sizeof(vmovr_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
 }
 #endif
 
@@ -1266,33 +1249,33 @@ DYNCOM_FILL_ACTION(vmovr),
 #ifdef VFP_DYNCOM_TAG
 int DYNCOM_TAG(vmovr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
 {
-	int instr_size = INSTR_SIZE;
-	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
-	DBG("In %s, pc=0x%x, next_pc=0x%x\n", __FUNCTION__, pc, *next_pc);
-	if(instr >> 28 != 0xe)
-		*tag |= TAG_CONDITIONAL;
+    int instr_size = INSTR_SIZE;
+    arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+    DBG("In %s, pc=0x%x, next_pc=0x%x\n", __FUNCTION__, pc, *next_pc);
+    if(instr >> 28 != 0xe)
+        *tag |= TAG_CONDITIONAL;
 
-	return instr_size;
+    return instr_size;
 }
 #endif
 #ifdef VFP_DYNCOM_TRANS
 int DYNCOM_TRANS(vmovr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
-	DBG("\t\tin %s VMOV \n", __FUNCTION__);
-	int single   = BIT(8) == 0;
-	int d        = (single ? BITS(12,15)<<1 | BIT(22) : BIT(22) << 4 | BITS(12,15));
-	int m        = (single ? BITS(0, 3)<<1 | BIT(5) : BITS(0, 3) | BIT(5)<<4);
+    DBG("\t\tin %s VMOV \n", __FUNCTION__);
+    int single   = BIT(8) == 0;
+    int d        = (single ? BITS(12,15)<<1 | BIT(22) : BIT(22) << 4 | BITS(12,15));
+    int m        = (single ? BITS(0, 3)<<1 | BIT(5) : BITS(0, 3) | BIT(5)<<4);
 
-	if (single)
-	{
-		LETFPS(d, FR32(m));
-	}
-	else
-	{
-		/* Check endian please */
-		LETFPS((d*2 + 1), FR32(m*2 + 1));
-		LETFPS((d * 2), FR32(m * 2));
-	}
-	return No_exp;
+    if (single)
+    {
+        LETFPS(d, FR32(m));
+    }
+    else
+    {
+        /* Check endian please */
+        LETFPS((d*2 + 1), FR32(m*2 + 1));
+        LETFPS((d * 2), FR32(m * 2));
+    }
+    return No_exp;
 }
 #endif
 
@@ -1301,52 +1284,50 @@ int DYNCOM_TRANS(vmovr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
 /* cond 1110 1D11 0000 Vd-- 101X 11M0 Vm-- */
 #ifdef VFP_INTERPRETER_STRUCT
 typedef struct _vabs_inst {
-	unsigned int instr;
-	unsigned int dp_operation;
+    unsigned int instr;
+    unsigned int dp_operation;
 } vabs_inst;
 #endif
 #ifdef VFP_INTERPRETER_TRANS
 ARM_INST_PTR INTERPRETER_TRANSLATE(vabs)(unsigned int inst, int index)
 {
-	VFP_DEBUG_TRANSLATE;VFP_DEBUG_UNTESTED(VABS);
-	
-	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vabs_inst));
-	vabs_inst *inst_cream = (vabs_inst *)inst_base->component;
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vabs_inst));
+    vabs_inst *inst_cream = (vabs_inst *)inst_base->component;
+
+    inst_base->cond  = BITS(inst, 28, 31);
+    inst_base->idx     = index;
+    inst_base->br     = NON_BRANCH;
+    inst_base->load_r15 = 0;
 
-	inst_base->cond  = BITS(inst, 28, 31);
-	inst_base->idx	 = index;
-	inst_base->br	 = NON_BRANCH;
-	inst_base->load_r15 = 0;
+    inst_cream->dp_operation = BIT(inst, 8);
+    inst_cream->instr = inst;
 
-	inst_cream->dp_operation = BIT(inst, 8);
-	inst_cream->instr = inst;
-	
-	return inst_base;
+    return inst_base;
 }
 #endif
 #ifdef VFP_INTERPRETER_IMPL
 VABS_INST:
 {
-	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
-		CHECK_VFP_ENABLED;
-		
-		DBG("VABS :\n");
+    if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        DBG("VABS :\n");
+
+        vabs_inst *inst_cream = (vabs_inst *)inst_base->component;
 
-		vabs_inst *inst_cream = (vabs_inst *)inst_base->component;
+        int ret;
 
-		int ret;
-		
-		if (inst_cream->dp_operation)
-			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
-		else
-			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+        if (inst_cream->dp_operation)
+            ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+        else
+            ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
 
-		CHECK_VFP_CDP_RET;
-	}
-	cpu->Reg[15] += GET_INST_SIZE(cpu);
-	INC_PC(sizeof(vabs_inst));
-	FETCH_INST;
-	GOTO_NEXT_INST;
+        CHECK_VFP_CDP_RET;
+    }
+    cpu->Reg[15] += GET_INST_SIZE(cpu);
+    INC_PC(sizeof(vabs_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
 }
 #endif
 
@@ -1356,50 +1337,50 @@ DYNCOM_FILL_ACTION(vabs),
 #ifdef VFP_DYNCOM_TAG
 int DYNCOM_TAG(vabs)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
 {
-	int instr_size = INSTR_SIZE;
-	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
-	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
-	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
-	return instr_size;
+    int instr_size = INSTR_SIZE;
+    //DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+    //arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+    arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+    return instr_size;
 }
 #endif
 #ifdef VFP_DYNCOM_TRANS
 int DYNCOM_TRANS(vabs)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
-	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
-	//arch_arm_undef(cpu, bb, instr);
-	int single   = BIT(8) == 0;
-	int d        = (single ? BITS(12,15)<<1 | BIT(22) : BIT(22) << 4 | BITS(12,15));
-	int m        = (single ? BITS(0, 3)<<1 | BIT(5) : BITS(0, 3) | BIT(5)<<4);
-	Value* m0;
-	if (single)
-	{
-		m0 =  FR32(m);
-		m0 = SELECT(FPCMP_OLT(m0,FPCONST32(0.0)),FPNEG32(m0),m0);
-		LETFPS(d,m0);
-	}
-	else
-	{
-		/* Check endian please */
-		Value *lo = FR32(2 * m);
-		Value *hi = FR32(2 * m + 1);
-		hi = IBITCAST32(hi);
-		lo = IBITCAST32(lo);
-		Value *hi64 = ZEXT64(hi);
-		Value* lo64 = ZEXT64(lo);
-		Value* v64 = OR(SHL(hi64,CONST64(32)),lo64);
-		m0 = FPBITCAST64(v64);
-		m0 = SELECT(FPCMP_OLT(m0,FPCONST64(0.0)),FPNEG64(m0),m0);
-		Value *val64 = IBITCAST64(m0);
-		hi = LSHR(val64,CONST64(32));
-		lo = AND(val64,CONST64(0xffffffff));
-		hi = TRUNC32(hi);
-		lo  = TRUNC32(lo);
-		hi = FPBITCAST32(hi);
-		lo = FPBITCAST32(lo);		
-		LETFPS(2*d ,lo);
-		LETFPS(d*2 + 1 , hi);
-	}
-	return No_exp;
+    //DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+    //arch_arm_undef(cpu, bb, instr);
+    int single   = BIT(8) == 0;
+    int d        = (single ? BITS(12,15)<<1 | BIT(22) : BIT(22) << 4 | BITS(12,15));
+    int m        = (single ? BITS(0, 3)<<1 | BIT(5) : BITS(0, 3) | BIT(5)<<4);
+    Value* m0;
+    if (single)
+    {
+        m0 =  FR32(m);
+        m0 = SELECT(FPCMP_OLT(m0,FPCONST32(0.0)),FPNEG32(m0),m0);
+        LETFPS(d,m0);
+    }
+    else
+    {
+        /* Check endian please */
+        Value *lo = FR32(2 * m);
+        Value *hi = FR32(2 * m + 1);
+        hi = IBITCAST32(hi);
+        lo = IBITCAST32(lo);
+        Value *hi64 = ZEXT64(hi);
+        Value* lo64 = ZEXT64(lo);
+        Value* v64 = OR(SHL(hi64,CONST64(32)),lo64);
+        m0 = FPBITCAST64(v64);
+        m0 = SELECT(FPCMP_OLT(m0,FPCONST64(0.0)),FPNEG64(m0),m0);
+        Value *val64 = IBITCAST64(m0);
+        hi = LSHR(val64,CONST64(32));
+        lo = AND(val64,CONST64(0xffffffff));
+        hi = TRUNC32(hi);
+        lo  = TRUNC32(lo);
+        hi = FPBITCAST32(hi);
+        lo = FPBITCAST32(lo);        
+        LETFPS(2*d ,lo);
+        LETFPS(d*2 + 1 , hi);
+    }
+    return No_exp;
 }
 #endif
 
@@ -1409,52 +1390,50 @@ int DYNCOM_TRANS(vabs)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
 
 #ifdef VFP_INTERPRETER_STRUCT
 typedef struct _vneg_inst {
-	unsigned int instr;
-	unsigned int dp_operation;
+    unsigned int instr;
+    unsigned int dp_operation;
 } vneg_inst;
 #endif
 #ifdef VFP_INTERPRETER_TRANS
 ARM_INST_PTR INTERPRETER_TRANSLATE(vneg)(unsigned int inst, int index)
 {
-	VFP_DEBUG_TRANSLATE;VFP_DEBUG_UNTESTED(VNEG);
-	
-	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vneg_inst));
-	vneg_inst *inst_cream = (vneg_inst *)inst_base->component;
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vneg_inst));
+    vneg_inst *inst_cream = (vneg_inst *)inst_base->component;
 
-	inst_base->cond  = BITS(inst, 28, 31);
-	inst_base->idx	 = index;
-	inst_base->br	 = NON_BRANCH;
-	inst_base->load_r15 = 0;
+    inst_base->cond  = BITS(inst, 28, 31);
+    inst_base->idx     = index;
+    inst_base->br     = NON_BRANCH;
+    inst_base->load_r15 = 0;
 
-	inst_cream->dp_operation = BIT(inst, 8);
-	inst_cream->instr = inst;
-	
-	return inst_base;
+    inst_cream->dp_operation = BIT(inst, 8);
+    inst_cream->instr = inst;
+
+    return inst_base;
 }
 #endif
 #ifdef VFP_INTERPRETER_IMPL
 VNEG_INST:
 {
-	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
-		CHECK_VFP_ENABLED;
+    if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        DBG("VNEG :\n");
 
-		DBG("VNEG :\n");
+        vneg_inst *inst_cream = (vneg_inst *)inst_base->component;
 
-		vneg_inst *inst_cream = (vneg_inst *)inst_base->component;
+        int ret;
 
-		int ret;
-		
-		if (inst_cream->dp_operation)
-			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
-		else
-			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+        if (inst_cream->dp_operation)
+            ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+        else
+            ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
 
-		CHECK_VFP_CDP_RET;
-	}
-	cpu->Reg[15] += GET_INST_SIZE(cpu);
-	INC_PC(sizeof(vneg_inst));
-	FETCH_INST;
-	GOTO_NEXT_INST;
+        CHECK_VFP_CDP_RET;
+    }
+    cpu->Reg[15] += GET_INST_SIZE(cpu);
+    INC_PC(sizeof(vneg_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
 }
 #endif
 
@@ -1464,50 +1443,50 @@ DYNCOM_FILL_ACTION(vneg),
 #ifdef VFP_DYNCOM_TAG
 int DYNCOM_TAG(vneg)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
 {
-	int instr_size = INSTR_SIZE;
-	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
-	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
-	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
-	return instr_size;
+    int instr_size = INSTR_SIZE;
+    DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+    //arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+    arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+    return instr_size;
 }
 #endif
 #ifdef VFP_DYNCOM_TRANS
 int DYNCOM_TRANS(vneg)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
-	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
-	//arch_arm_undef(cpu, bb, instr);
-	int single   = BIT(8) == 0;
-	int d        = (single ? BITS(12,15)<<1 | BIT(22) : BIT(22) << 4 | BITS(12,15));
-	int m        = (single ? BITS(0, 3)<<1 | BIT(5) : BITS(0, 3) | BIT(5)<<4);
-	Value* m0;
-	if (single)
-	{
-		m0 =  FR32(m);
-		m0 = FPNEG32(m0);
-		LETFPS(d,m0);
-	}
-	else
-	{
-		/* Check endian please */
-		Value *lo = FR32(2 * m);
-		Value *hi = FR32(2 * m + 1);
-		hi = IBITCAST32(hi);
-		lo = IBITCAST32(lo);
-		Value *hi64 = ZEXT64(hi);
-		Value* lo64 = ZEXT64(lo);
-		Value* v64 = OR(SHL(hi64,CONST64(32)),lo64);
-		m0 = FPBITCAST64(v64);
-		m0 = FPNEG64(m0);
-		Value *val64 = IBITCAST64(m0);
-		hi = LSHR(val64,CONST64(32));
-		lo = AND(val64,CONST64(0xffffffff));
-		hi = TRUNC32(hi);
-		lo  = TRUNC32(lo);
-		hi = FPBITCAST32(hi);
-		lo = FPBITCAST32(lo);		
-		LETFPS(2*d ,lo);
-		LETFPS(d*2 + 1 , hi);
-	}
-	return No_exp;
+    DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+    //arch_arm_undef(cpu, bb, instr);
+    int single   = BIT(8) == 0;
+    int d        = (single ? BITS(12,15)<<1 | BIT(22) : BIT(22) << 4 | BITS(12,15));
+    int m        = (single ? BITS(0, 3)<<1 | BIT(5) : BITS(0, 3) | BIT(5)<<4);
+    Value* m0;
+    if (single)
+    {
+        m0 =  FR32(m);
+        m0 = FPNEG32(m0);
+        LETFPS(d,m0);
+    }
+    else
+    {
+        /* Check endian please */
+        Value *lo = FR32(2 * m);
+        Value *hi = FR32(2 * m + 1);
+        hi = IBITCAST32(hi);
+        lo = IBITCAST32(lo);
+        Value *hi64 = ZEXT64(hi);
+        Value* lo64 = ZEXT64(lo);
+        Value* v64 = OR(SHL(hi64,CONST64(32)),lo64);
+        m0 = FPBITCAST64(v64);
+        m0 = FPNEG64(m0);
+        Value *val64 = IBITCAST64(m0);
+        hi = LSHR(val64,CONST64(32));
+        lo = AND(val64,CONST64(0xffffffff));
+        hi = TRUNC32(hi);
+        lo  = TRUNC32(lo);
+        hi = FPBITCAST32(hi);
+        lo = FPBITCAST32(lo);        
+        LETFPS(2*d ,lo);
+        LETFPS(d*2 + 1 , hi);
+    }
+    return No_exp;
 }
 #endif
 
@@ -1516,52 +1495,52 @@ int DYNCOM_TRANS(vneg)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
 /* cond 1110 1D11 0001 Vd-- 101X 11M0 Vm-- */
 #ifdef VFP_INTERPRETER_STRUCT
 typedef struct _vsqrt_inst {
-	unsigned int instr;
-	unsigned int dp_operation;
+    unsigned int instr;
+    unsigned int dp_operation;
 } vsqrt_inst;
 #endif
 #ifdef VFP_INTERPRETER_TRANS
 ARM_INST_PTR INTERPRETER_TRANSLATE(vsqrt)(unsigned int inst, int index)
 {
-	VFP_DEBUG_TRANSLATE;
-	
-	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vsqrt_inst));
-	vsqrt_inst *inst_cream = (vsqrt_inst *)inst_base->component;
+    VFP_DEBUG_TRANSLATE;
+
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vsqrt_inst));
+    vsqrt_inst *inst_cream = (vsqrt_inst *)inst_base->component;
+
+    inst_base->cond  = BITS(inst, 28, 31);
+    inst_base->idx     = index;
+    inst_base->br     = NON_BRANCH;
+    inst_base->load_r15 = 0;
 
-	inst_base->cond  = BITS(inst, 28, 31);
-	inst_base->idx	 = index;
-	inst_base->br	 = NON_BRANCH;
-	inst_base->load_r15 = 0;
+    inst_cream->dp_operation = BIT(inst, 8);
+    inst_cream->instr = inst;
 
-	inst_cream->dp_operation = BIT(inst, 8);
-	inst_cream->instr = inst;
-	
-	return inst_base;
+    return inst_base;
 }
 #endif
 #ifdef VFP_INTERPRETER_IMPL
 VSQRT_INST:
 {
-	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
-		CHECK_VFP_ENABLED;
-		
-		DBG("VSQRT :\n");
-		
-		vsqrt_inst *inst_cream = (vsqrt_inst *)inst_base->component;
+    if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
 
-		int ret;
-		
-		if (inst_cream->dp_operation)
-			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
-		else
-			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+        DBG("VSQRT :\n");
 
-		CHECK_VFP_CDP_RET;
-	}
-	cpu->Reg[15] += GET_INST_SIZE(cpu);
-	INC_PC(sizeof(vsqrt_inst));
-	FETCH_INST;
-	GOTO_NEXT_INST;
+        vsqrt_inst *inst_cream = (vsqrt_inst *)inst_base->component;
+
+        int ret;
+
+        if (inst_cream->dp_operation)
+            ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+        else
+            ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+        CHECK_VFP_CDP_RET;
+    }
+    cpu->Reg[15] += GET_INST_SIZE(cpu);
+    INC_PC(sizeof(vsqrt_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
 }
 #endif
 
@@ -1571,38 +1550,38 @@ DYNCOM_FILL_ACTION(vsqrt),
 #ifdef VFP_DYNCOM_TAG
 int DYNCOM_TAG(vsqrt)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
 {
-	int instr_size = INSTR_SIZE;
-	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
-	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
-	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
-	return instr_size;
+    int instr_size = INSTR_SIZE;
+    DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+    //arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+    arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+    return instr_size;
 }
 #endif
 #ifdef VFP_DYNCOM_TRANS
 int DYNCOM_TRANS(vsqrt)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
-	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
-	//arch_arm_undef(cpu, bb, instr);
-	int dp_op = (BIT(8) == 1);
-	int d = dp_op ? BITS(12,15) | BIT(22) << 4 : BIT(22) | BITS(12,15) << 1;
-	int m = dp_op ? BITS(0,3) | BIT(5) << 4 : BIT(5) | BITS(0,3) << 1;
-	Value* v;
-	Value* tmp;
-	if(dp_op){
-		v = SHL(ZEXT64(IBITCAST32(FR32(2 * m + 1))),CONST64(32));
-		tmp = ZEXT64(IBITCAST32(FR32(2 * m)));
-		v = OR(v,tmp);
-		v = FPSQRT(FPBITCAST64(v));
-		tmp = TRUNC32(LSHR(IBITCAST64(v),CONST64(32)));
-		v = TRUNC32(AND(IBITCAST64(v),CONST64( 0xffffffff)));		
-		LETFPS(2 * d , FPBITCAST32(v));
-		LETFPS(2 * d + 1, FPBITCAST32(tmp));
-	}else {
-		v = FR32(m);
-		v = FPSQRT(FPEXT(64,v));
-		v = FPTRUNC(32,v);
-		LETFPS(d,v);
-	}
-	return No_exp;
+    DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+    //arch_arm_undef(cpu, bb, instr);
+    int dp_op = (BIT(8) == 1);
+    int d = dp_op ? BITS(12,15) | BIT(22) << 4 : BIT(22) | BITS(12,15) << 1;
+    int m = dp_op ? BITS(0,3) | BIT(5) << 4 : BIT(5) | BITS(0,3) << 1;
+    Value* v;
+    Value* tmp;
+    if(dp_op){
+        v = SHL(ZEXT64(IBITCAST32(FR32(2 * m + 1))),CONST64(32));
+        tmp = ZEXT64(IBITCAST32(FR32(2 * m)));
+        v = OR(v,tmp);
+        v = FPSQRT(FPBITCAST64(v));
+        tmp = TRUNC32(LSHR(IBITCAST64(v),CONST64(32)));
+        v = TRUNC32(AND(IBITCAST64(v),CONST64( 0xffffffff)));        
+        LETFPS(2 * d , FPBITCAST32(v));
+        LETFPS(2 * d + 1, FPBITCAST32(tmp));
+    }else {
+        v = FR32(m);
+        v = FPSQRT(FPEXT(64,v));
+        v = FPTRUNC(32,v);
+        LETFPS(d,v);
+    }
+    return No_exp;
 }
 #endif
 
@@ -1611,52 +1590,52 @@ int DYNCOM_TRANS(vsqrt)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
 /* cond 1110 1D11 0100 Vd-- 101X E1M0 Vm-- Encoding 1 */
 #ifdef VFP_INTERPRETER_STRUCT
 typedef struct _vcmp_inst {
-	unsigned int instr;
-	unsigned int dp_operation;
+    unsigned int instr;
+    unsigned int dp_operation;
 } vcmp_inst;
 #endif
 #ifdef VFP_INTERPRETER_TRANS
 ARM_INST_PTR INTERPRETER_TRANSLATE(vcmp)(unsigned int inst, int index)
 {
-	VFP_DEBUG_TRANSLATE;
-	
-	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vcmp_inst));
-	vcmp_inst *inst_cream = (vcmp_inst *)inst_base->component;
+    VFP_DEBUG_TRANSLATE;
+
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vcmp_inst));
+    vcmp_inst *inst_cream = (vcmp_inst *)inst_base->component;
 
-	inst_base->cond  = BITS(inst, 28, 31);
-	inst_base->idx	 = index;
-	inst_base->br	 = NON_BRANCH;
-	inst_base->load_r15 = 0;
+    inst_base->cond  = BITS(inst, 28, 31);
+    inst_base->idx     = index;
+    inst_base->br     = NON_BRANCH;
+    inst_base->load_r15 = 0;
 
-	inst_cream->dp_operation = BIT(inst, 8);
-	inst_cream->instr = inst;
-	
-	return inst_base;
+    inst_cream->dp_operation = BIT(inst, 8);
+    inst_cream->instr = inst;
+
+    return inst_base;
 }
 #endif
 #ifdef VFP_INTERPRETER_IMPL
 VCMP_INST:
 {
-	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
-		CHECK_VFP_ENABLED;
+    if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        DBG("VCMP(1) :\n");
 
-		DBG("VCMP(1) :\n");
+        vcmp_inst *inst_cream = (vcmp_inst *)inst_base->component;
 
-		vcmp_inst *inst_cream = (vcmp_inst *)inst_base->component;
+        int ret;
 
-		int ret;
-		
-		if (inst_cream->dp_operation)
-			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
-		else
-			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+        if (inst_cream->dp_operation)
+            ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+        else
+            ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
 
-		CHECK_VFP_CDP_RET;
-	}
-	cpu->Reg[15] += GET_INST_SIZE(cpu);
-	INC_PC(sizeof(vcmp_inst));
-	FETCH_INST;
-	GOTO_NEXT_INST;
+        CHECK_VFP_CDP_RET;
+    }
+    cpu->Reg[15] += GET_INST_SIZE(cpu);
+    INC_PC(sizeof(vcmp_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
 }
 #endif
 
@@ -1666,65 +1645,65 @@ DYNCOM_FILL_ACTION(vcmp),
 #ifdef VFP_DYNCOM_TAG
 int DYNCOM_TAG(vcmp)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
 {
-	int instr_size = INSTR_SIZE;
-	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
-	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
-	return instr_size;
+    int instr_size = INSTR_SIZE;
+    //arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+    arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+    return instr_size;
 }
 #endif
 #ifdef VFP_DYNCOM_TRANS
 int DYNCOM_TRANS(vcmp)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
-	DBG("\t\tin %s instruction is executed out of JIT.\n", __FUNCTION__);
-	//arch_arm_undef(cpu, bb, instr);
-	int dp_op = (BIT(8) == 1);
-	int d = dp_op ? BITS(12,15) | BIT(22) << 4 : BIT(22) | BITS(12,15) << 1;
-	int m = dp_op ? BITS(0,3) | BIT(5) << 4 : BIT(5) | BITS(0,3) << 1;
-	Value* v;
-	Value* tmp;
-	Value* n;
-	Value* z;
-	Value* c;
-	Value* vt;
-	Value* v1;
-	Value* nzcv;
-	if(dp_op){
-		v = SHL(ZEXT64(IBITCAST32(FR32(2 * m + 1))),CONST64(32));
-		tmp = ZEXT64(IBITCAST32(FR32(2 * m)));
-		v1 = OR(v,tmp);
-		v = SHL(ZEXT64(IBITCAST32(FR32(2 * d + 1))),CONST64(32));
-		tmp = ZEXT64(IBITCAST32(FR32(2 * d)));
-		v = OR(v,tmp);
-		z = FPCMP_OEQ(FPBITCAST64(v),FPBITCAST64(v1));
-		n = FPCMP_OLT(FPBITCAST64(v),FPBITCAST64(v1));
-		c = FPCMP_OGE(FPBITCAST64(v),FPBITCAST64(v1)); 
-		tmp =  FPCMP_UNO(FPBITCAST64(v),FPBITCAST64(v1));
-		v1 = tmp;
-		c = OR(c,tmp);
-		n = SHL(ZEXT32(n),CONST32(31));
-		z = SHL(ZEXT32(z),CONST32(30));
-		c = SHL(ZEXT32(c),CONST32(29));
-		v1 = SHL(ZEXT32(v1),CONST(28));
-		nzcv = OR(OR(OR(n,z),c),v1);	
-		v = R(VFP_FPSCR);
-		tmp = OR(nzcv,AND(v,CONST32(0x0fffffff)));
-		LET(VFP_FPSCR,tmp);
-	}else {
-		z = FPCMP_OEQ(FR32(d),FR32(m));
-		n = FPCMP_OLT(FR32(d),FR32(m));
-		c = FPCMP_OGE(FR32(d),FR32(m)); 
-		tmp = FPCMP_UNO(FR32(d),FR32(m));
-		c = OR(c,tmp);
-		v1 = tmp;
-		n = SHL(ZEXT32(n),CONST32(31));
-		z = SHL(ZEXT32(z),CONST32(30));
-		c = SHL(ZEXT32(c),CONST32(29));
-		v1 = SHL(ZEXT32(v1),CONST(28));
-		nzcv = OR(OR(OR(n,z),c),v1);	
-		v = R(VFP_FPSCR);
-		tmp = OR(nzcv,AND(v,CONST32(0x0fffffff)));
-		LET(VFP_FPSCR,tmp);
-	}
-	return No_exp;
+    DBG("\t\tin %s instruction is executed out of JIT.\n", __FUNCTION__);
+    //arch_arm_undef(cpu, bb, instr);
+    int dp_op = (BIT(8) == 1);
+    int d = dp_op ? BITS(12,15) | BIT(22) << 4 : BIT(22) | BITS(12,15) << 1;
+    int m = dp_op ? BITS(0,3) | BIT(5) << 4 : BIT(5) | BITS(0,3) << 1;
+    Value* v;
+    Value* tmp;
+    Value* n;
+    Value* z;
+    Value* c;
+    Value* vt;
+    Value* v1;
+    Value* nzcv;
+    if(dp_op){
+        v = SHL(ZEXT64(IBITCAST32(FR32(2 * m + 1))),CONST64(32));
+        tmp = ZEXT64(IBITCAST32(FR32(2 * m)));
+        v1 = OR(v,tmp);
+        v = SHL(ZEXT64(IBITCAST32(FR32(2 * d + 1))),CONST64(32));
+        tmp = ZEXT64(IBITCAST32(FR32(2 * d)));
+        v = OR(v,tmp);
+        z = FPCMP_OEQ(FPBITCAST64(v),FPBITCAST64(v1));
+        n = FPCMP_OLT(FPBITCAST64(v),FPBITCAST64(v1));
+        c = FPCMP_OGE(FPBITCAST64(v),FPBITCAST64(v1)); 
+        tmp =  FPCMP_UNO(FPBITCAST64(v),FPBITCAST64(v1));
+        v1 = tmp;
+        c = OR(c,tmp);
+        n = SHL(ZEXT32(n),CONST32(31));
+        z = SHL(ZEXT32(z),CONST32(30));
+        c = SHL(ZEXT32(c),CONST32(29));
+        v1 = SHL(ZEXT32(v1),CONST(28));
+        nzcv = OR(OR(OR(n,z),c),v1);    
+        v = R(VFP_FPSCR);
+        tmp = OR(nzcv,AND(v,CONST32(0x0fffffff)));
+        LET(VFP_FPSCR,tmp);
+    }else {
+        z = FPCMP_OEQ(FR32(d),FR32(m));
+        n = FPCMP_OLT(FR32(d),FR32(m));
+        c = FPCMP_OGE(FR32(d),FR32(m)); 
+        tmp = FPCMP_UNO(FR32(d),FR32(m));
+        c = OR(c,tmp);
+        v1 = tmp;
+        n = SHL(ZEXT32(n),CONST32(31));
+        z = SHL(ZEXT32(z),CONST32(30));
+        c = SHL(ZEXT32(c),CONST32(29));
+        v1 = SHL(ZEXT32(v1),CONST(28));
+        nzcv = OR(OR(OR(n,z),c),v1);    
+        v = R(VFP_FPSCR);
+        tmp = OR(nzcv,AND(v,CONST32(0x0fffffff)));
+        LET(VFP_FPSCR,tmp);
+    }
+    return No_exp;
 }
 #endif
 
@@ -1733,52 +1712,52 @@ int DYNCOM_TRANS(vcmp)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
 /* cond 1110 1D11 0100 Vd-- 101X E1M0 Vm-- Encoding 2 */
 #ifdef VFP_INTERPRETER_STRUCT
 typedef struct _vcmp2_inst {
-	unsigned int instr;
-	unsigned int dp_operation;
+    unsigned int instr;
+    unsigned int dp_operation;
 } vcmp2_inst;
 #endif
 #ifdef VFP_INTERPRETER_TRANS
 ARM_INST_PTR INTERPRETER_TRANSLATE(vcmp2)(unsigned int inst, int index)
 {
-	VFP_DEBUG_TRANSLATE;
-	
-	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vcmp2_inst));
-	vcmp2_inst *inst_cream = (vcmp2_inst *)inst_base->component;
+    VFP_DEBUG_TRANSLATE;
+
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vcmp2_inst));
+    vcmp2_inst *inst_cream = (vcmp2_inst *)inst_base->component;
 
-	inst_base->cond  = BITS(inst, 28, 31);
-	inst_base->idx	 = index;
-	inst_base->br	 = NON_BRANCH;
-	inst_base->load_r15 = 0;
+    inst_base->cond  = BITS(inst, 28, 31);
+    inst_base->idx     = index;
+    inst_base->br     = NON_BRANCH;
+    inst_base->load_r15 = 0;
 
-	inst_cream->dp_operation = BIT(inst, 8);
-	inst_cream->instr = inst;
-	
-	return inst_base;
+    inst_cream->dp_operation = BIT(inst, 8);
+    inst_cream->instr = inst;
+
+    return inst_base;
 }
 #endif
 #ifdef VFP_INTERPRETER_IMPL
 VCMP2_INST:
 {
-	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
-		CHECK_VFP_ENABLED;
-		
-		DBG("VCMP(2) :\n");
+    if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        DBG("VCMP(2) :\n");
 
-		vcmp2_inst *inst_cream = (vcmp2_inst *)inst_base->component;
+        vcmp2_inst *inst_cream = (vcmp2_inst *)inst_base->component;
 
-		int ret;
-		
-		if (inst_cream->dp_operation)
-			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
-		else
-			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+        int ret;
 
-		CHECK_VFP_CDP_RET;
-	}
-	cpu->Reg[15] += GET_INST_SIZE(cpu);
-	INC_PC(sizeof(vcmp2_inst));
-	FETCH_INST;
-	GOTO_NEXT_INST;
+        if (inst_cream->dp_operation)
+            ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+        else
+            ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+        CHECK_VFP_CDP_RET;
+    }
+    cpu->Reg[15] += GET_INST_SIZE(cpu);
+    INC_PC(sizeof(vcmp2_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
 }
 #endif
 
@@ -1788,65 +1767,65 @@ DYNCOM_FILL_ACTION(vcmp2),
 #ifdef VFP_DYNCOM_TAG
 int DYNCOM_TAG(vcmp2)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
 {
-	int instr_size = INSTR_SIZE;
-	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
-	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
-	return instr_size;
+    int instr_size = INSTR_SIZE;
+    //arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+    arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+    return instr_size;
 }
 #endif
 #ifdef VFP_DYNCOM_TRANS
 int DYNCOM_TRANS(vcmp2)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
-	DBG("\t\tin %s instruction will executed out of JIT.\n", __FUNCTION__);
-	//arch_arm_undef(cpu, bb, instr);
-	int dp_op = (BIT(8) == 1);
-	int d = dp_op ? BITS(12,15) | BIT(22) << 4 : BIT(22) | BITS(12,15) << 1;
-	//int m = dp_op ? BITS(0,3) | BIT(5) << 4 : BIT(5) | BITS(0,3) << 1;
-	Value* v;
-	Value* tmp;
-	Value* n;
-	Value* z;
-	Value* c;
-	Value* vt;
-	Value* v1;
-	Value* nzcv;
-	if(dp_op){
-		v1 = CONST64(0);
-		v = SHL(ZEXT64(IBITCAST32(FR32(2 * d + 1))),CONST64(32));
-		tmp = ZEXT64(IBITCAST32(FR32(2 * d)));
-		v = OR(v,tmp);
-		z = FPCMP_OEQ(FPBITCAST64(v),FPBITCAST64(v1));
-		n = FPCMP_OLT(FPBITCAST64(v),FPBITCAST64(v1));
-		c = FPCMP_OGE(FPBITCAST64(v),FPBITCAST64(v1)); 
-		tmp =  FPCMP_UNO(FPBITCAST64(v),FPBITCAST64(v1));
-		v1 = tmp;
-		c = OR(c,tmp);
-		n = SHL(ZEXT32(n),CONST32(31));
-		z = SHL(ZEXT32(z),CONST32(30));
-		c = SHL(ZEXT32(c),CONST32(29));
-		v1 = SHL(ZEXT32(v1),CONST(28));
-		nzcv = OR(OR(OR(n,z),c),v1);	
-		v = R(VFP_FPSCR);
-		tmp = OR(nzcv,AND(v,CONST32(0x0fffffff)));
-		LET(VFP_FPSCR,tmp);
-	}else {
-		v1 = CONST(0);
-		v1 = FPBITCAST32(v1);
-		z = FPCMP_OEQ(FR32(d),v1);
-		n = FPCMP_OLT(FR32(d),v1);
-		c = FPCMP_OGE(FR32(d),v1); 
-		tmp = FPCMP_UNO(FR32(d),v1);
-		c = OR(c,tmp);
-		v1 = tmp;
-		n = SHL(ZEXT32(n),CONST32(31));
-		z = SHL(ZEXT32(z),CONST32(30));
-		c = SHL(ZEXT32(c),CONST32(29));
-		v1 = SHL(ZEXT32(v1),CONST(28));
-		nzcv = OR(OR(OR(n,z),c),v1);	
-		v = R(VFP_FPSCR);
-		tmp = OR(nzcv,AND(v,CONST32(0x0fffffff)));
-		LET(VFP_FPSCR,tmp);
-	}
-	return No_exp;
+    DBG("\t\tin %s instruction will executed out of JIT.\n", __FUNCTION__);
+    //arch_arm_undef(cpu, bb, instr);
+    int dp_op = (BIT(8) == 1);
+    int d = dp_op ? BITS(12,15) | BIT(22) << 4 : BIT(22) | BITS(12,15) << 1;
+    //int m = dp_op ? BITS(0,3) | BIT(5) << 4 : BIT(5) | BITS(0,3) << 1;
+    Value* v;
+    Value* tmp;
+    Value* n;
+    Value* z;
+    Value* c;
+    Value* vt;
+    Value* v1;
+    Value* nzcv;
+    if(dp_op){
+        v1 = CONST64(0);
+        v = SHL(ZEXT64(IBITCAST32(FR32(2 * d + 1))),CONST64(32));
+        tmp = ZEXT64(IBITCAST32(FR32(2 * d)));
+        v = OR(v,tmp);
+        z = FPCMP_OEQ(FPBITCAST64(v),FPBITCAST64(v1));
+        n = FPCMP_OLT(FPBITCAST64(v),FPBITCAST64(v1));
+        c = FPCMP_OGE(FPBITCAST64(v),FPBITCAST64(v1)); 
+        tmp =  FPCMP_UNO(FPBITCAST64(v),FPBITCAST64(v1));
+        v1 = tmp;
+        c = OR(c,tmp);
+        n = SHL(ZEXT32(n),CONST32(31));
+        z = SHL(ZEXT32(z),CONST32(30));
+        c = SHL(ZEXT32(c),CONST32(29));
+        v1 = SHL(ZEXT32(v1),CONST(28));
+        nzcv = OR(OR(OR(n,z),c),v1);    
+        v = R(VFP_FPSCR);
+        tmp = OR(nzcv,AND(v,CONST32(0x0fffffff)));
+        LET(VFP_FPSCR,tmp);
+    }else {
+        v1 = CONST(0);
+        v1 = FPBITCAST32(v1);
+        z = FPCMP_OEQ(FR32(d),v1);
+        n = FPCMP_OLT(FR32(d),v1);
+        c = FPCMP_OGE(FR32(d),v1); 
+        tmp = FPCMP_UNO(FR32(d),v1);
+        c = OR(c,tmp);
+        v1 = tmp;
+        n = SHL(ZEXT32(n),CONST32(31));
+        z = SHL(ZEXT32(z),CONST32(30));
+        c = SHL(ZEXT32(c),CONST32(29));
+        v1 = SHL(ZEXT32(v1),CONST(28));
+        nzcv = OR(OR(OR(n,z),c),v1);    
+        v = R(VFP_FPSCR);
+        tmp = OR(nzcv,AND(v,CONST32(0x0fffffff)));
+        LET(VFP_FPSCR,tmp);
+    }
+    return No_exp;
 }
 #endif
 
@@ -1855,52 +1834,52 @@ int DYNCOM_TRANS(vcmp2)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
 /* cond 1110 1D11 0111 Vd-- 101X 11M0 Vm-- */
 #ifdef VFP_INTERPRETER_STRUCT
 typedef struct _vcvtbds_inst {
-	unsigned int instr;
-	unsigned int dp_operation;
+    unsigned int instr;
+    unsigned int dp_operation;
 } vcvtbds_inst;
 #endif
 #ifdef VFP_INTERPRETER_TRANS
 ARM_INST_PTR INTERPRETER_TRANSLATE(vcvtbds)(unsigned int inst, int index)
 {
-	VFP_DEBUG_TRANSLATE;
-	
-	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vcvtbds_inst));
-	vcvtbds_inst *inst_cream = (vcvtbds_inst *)inst_base->component;
+    VFP_DEBUG_TRANSLATE;
+
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vcvtbds_inst));
+    vcvtbds_inst *inst_cream = (vcvtbds_inst *)inst_base->component;
 
-	inst_base->cond  = BITS(inst, 28, 31);
-	inst_base->idx	 = index;
-	inst_base->br	 = NON_BRANCH;
-	inst_base->load_r15 = 0;
+    inst_base->cond  = BITS(inst, 28, 31);
+    inst_base->idx     = index;
+    inst_base->br     = NON_BRANCH;
+    inst_base->load_r15 = 0;
 
-	inst_cream->dp_operation = BIT(inst, 8);
-	inst_cream->instr = inst;
-	
-	return inst_base;
+    inst_cream->dp_operation = BIT(inst, 8);
+    inst_cream->instr = inst;
+
+    return inst_base;
 }
 #endif
 #ifdef VFP_INTERPRETER_IMPL
 VCVTBDS_INST:
 {
-	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
-		CHECK_VFP_ENABLED;
-		
-		DBG("VCVT(BDS) :\n");
+    if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        DBG("VCVT(BDS) :\n");
 
-		vcvtbds_inst *inst_cream = (vcvtbds_inst *)inst_base->component;
+        vcvtbds_inst *inst_cream = (vcvtbds_inst *)inst_base->component;
 
-		int ret;
-		
-		if (inst_cream->dp_operation)
-			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
-		else
-			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+        int ret;
 
-		CHECK_VFP_CDP_RET;
-	}
-	cpu->Reg[15] += GET_INST_SIZE(cpu);
-	INC_PC(sizeof(vcvtbds_inst));
-	FETCH_INST;
-	GOTO_NEXT_INST;
+        if (inst_cream->dp_operation)
+            ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+        else
+            ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+        CHECK_VFP_CDP_RET;
+    }
+    cpu->Reg[15] += GET_INST_SIZE(cpu);
+    INC_PC(sizeof(vcvtbds_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
 }
 #endif
 
@@ -1910,39 +1889,39 @@ DYNCOM_FILL_ACTION(vcvtbds),
 #ifdef VFP_DYNCOM_TAG
 int DYNCOM_TAG(vcvtbds)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
 {
-	int instr_size = INSTR_SIZE;
-	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
-	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
-	return instr_size;
+    int instr_size = INSTR_SIZE;
+    //arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+    arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+    return instr_size;
 }
 #endif
 #ifdef VFP_DYNCOM_TRANS
 int DYNCOM_TRANS(vcvtbds)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
-	DBG("\t\tin %s instruction is executed out.\n", __FUNCTION__);
-	//arch_arm_undef(cpu, bb, instr);
-	int dp_op = (BIT(8) == 1);
-	int d = dp_op ? BITS(12,15) << 1 | BIT(22) : BIT(22) << 4 | BITS(12,15);
-	int m = dp_op ? BITS(0,3) | BIT(5) << 4 : BIT(5) | BITS(0,3) << 1;
-	int d2s = dp_op;
-	Value* v;
-	Value* tmp;
-	Value* v1;
-	if(d2s){
-		v = SHL(ZEXT64(IBITCAST32(FR32(2 * m + 1))),CONST64(32));
-		tmp = ZEXT64(IBITCAST32(FR32(2 * m)));
-		v1 = OR(v,tmp);
-		tmp = FPTRUNC(32,FPBITCAST64(v1));
-		LETFPS(d,tmp);	
-	}else {
-		v = FR32(m);
-		tmp = FPEXT(64,v);
-		v = IBITCAST64(tmp);
-		tmp = TRUNC32(AND(v,CONST64(0xffffffff)));
-		v1 = TRUNC32(LSHR(v,CONST64(32)));
-		LETFPS(2 * d, FPBITCAST32(tmp) );
-		LETFPS(2 * d + 1, FPBITCAST32(v1));
-	}
-	return No_exp;
+    DBG("\t\tin %s instruction is executed out.\n", __FUNCTION__);
+    //arch_arm_undef(cpu, bb, instr);
+    int dp_op = (BIT(8) == 1);
+    int d = dp_op ? BITS(12,15) << 1 | BIT(22) : BIT(22) << 4 | BITS(12,15);
+    int m = dp_op ? BITS(0,3) | BIT(5) << 4 : BIT(5) | BITS(0,3) << 1;
+    int d2s = dp_op;
+    Value* v;
+    Value* tmp;
+    Value* v1;
+    if(d2s){
+        v = SHL(ZEXT64(IBITCAST32(FR32(2 * m + 1))),CONST64(32));
+        tmp = ZEXT64(IBITCAST32(FR32(2 * m)));
+        v1 = OR(v,tmp);
+        tmp = FPTRUNC(32,FPBITCAST64(v1));
+        LETFPS(d,tmp);    
+    }else {
+        v = FR32(m);
+        tmp = FPEXT(64,v);
+        v = IBITCAST64(tmp);
+        tmp = TRUNC32(AND(v,CONST64(0xffffffff)));
+        v1 = TRUNC32(LSHR(v,CONST64(32)));
+        LETFPS(2 * d, FPBITCAST32(tmp) );
+        LETFPS(2 * d + 1, FPBITCAST32(v1));
+    }
+    return No_exp;
 }
 #endif
 
@@ -1951,52 +1930,52 @@ int DYNCOM_TRANS(vcvtbds)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc)
 /* cond 1110 1D11 1op2 Vd-- 101X X1M0 Vm-- */
 #ifdef VFP_INTERPRETER_STRUCT
 typedef struct _vcvtbff_inst {
-	unsigned int instr;
-	unsigned int dp_operation;
+    unsigned int instr;
+    unsigned int dp_operation;
 } vcvtbff_inst;
 #endif
 #ifdef VFP_INTERPRETER_TRANS
 ARM_INST_PTR INTERPRETER_TRANSLATE(vcvtbff)(unsigned int inst, int index)
 {
-	VFP_DEBUG_TRANSLATE;VFP_DEBUG_UNTESTED(VCVTBFF);
-	
-	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vcvtbff_inst));
-	vcvtbff_inst *inst_cream = (vcvtbff_inst *)inst_base->component;
+    VFP_DEBUG_TRANSLATE;VFP_DEBUG_UNTESTED(VCVTBFF);
+
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vcvtbff_inst));
+    vcvtbff_inst *inst_cream = (vcvtbff_inst *)inst_base->component;
 
-	inst_base->cond  = BITS(inst, 28, 31);
-	inst_base->idx	 = index;
-	inst_base->br	 = NON_BRANCH;
-	inst_base->load_r15 = 0;
+    inst_base->cond  = BITS(inst, 28, 31);
+    inst_base->idx     = index;
+    inst_base->br     = NON_BRANCH;
+    inst_base->load_r15 = 0;
 
-	inst_cream->dp_operation = BIT(inst, 8);
-	inst_cream->instr = inst;
-	
-	return inst_base;
+    inst_cream->dp_operation = BIT(inst, 8);
+    inst_cream->instr = inst;
+
+    return inst_base;
 }
 #endif
 #ifdef VFP_INTERPRETER_IMPL
 VCVTBFF_INST:
 {
-	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
-		CHECK_VFP_ENABLED;
-		
-		DBG("VCVT(BFF) :\n");
+    if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        DBG("VCVT(BFF) :\n");
 
-		vcvtbff_inst *inst_cream = (vcvtbff_inst *)inst_base->component;
+        vcvtbff_inst *inst_cream = (vcvtbff_inst *)inst_base->component;
 
-		int ret;
-		
-		if (inst_cream->dp_operation)
-			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
-		else
-			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+        int ret;
 
-		CHECK_VFP_CDP_RET;
-	}
-	cpu->Reg[15] += GET_INST_SIZE(cpu);
-	INC_PC(sizeof(vcvtbff_inst));
-	FETCH_INST;
-	GOTO_NEXT_INST;
+        if (inst_cream->dp_operation)
+            ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+        else
+            ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+        CHECK_VFP_CDP_RET;
+    }
+    cpu->Reg[15] += GET_INST_SIZE(cpu);
+    INC_PC(sizeof(vcvtbff_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
 }
 #endif
 
@@ -2006,17 +1985,17 @@ DYNCOM_FILL_ACTION(vcvtbff),
 #ifdef VFP_DYNCOM_TAG
 int DYNCOM_TAG(vcvtbff)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
 {
-	int instr_size = INSTR_SIZE;
-	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
-	arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
-	return instr_size;
+    int instr_size = INSTR_SIZE;
+    DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+    arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+    return instr_size;
 }
 #endif
 #ifdef VFP_DYNCOM_TRANS
 int DYNCOM_TRANS(vcvtbff)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
-	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
-	arch_arm_undef(cpu, bb, instr);
-	return No_exp;
+    DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+    arch_arm_undef(cpu, bb, instr);
+    return No_exp;
 }
 #endif
 
@@ -2025,53 +2004,53 @@ int DYNCOM_TRANS(vcvtbff)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc)
 /* cond 1110 1D11 1op2 Vd-- 101X X1M0 Vm-- */
 #ifdef VFP_INTERPRETER_STRUCT
 typedef struct _vcvtbfi_inst {
-	unsigned int instr;
-	unsigned int dp_operation;
+    unsigned int instr;
+    unsigned int dp_operation;
 } vcvtbfi_inst;
 #endif
 #ifdef VFP_INTERPRETER_TRANS
 ARM_INST_PTR INTERPRETER_TRANSLATE(vcvtbfi)(unsigned int inst, int index)
 {
-	VFP_DEBUG_TRANSLATE;
-	
-	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vcvtbfi_inst));
-	vcvtbfi_inst *inst_cream = (vcvtbfi_inst *)inst_base->component;
+    VFP_DEBUG_TRANSLATE;
+
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vcvtbfi_inst));
+    vcvtbfi_inst *inst_cream = (vcvtbfi_inst *)inst_base->component;
 
-	inst_base->cond  = BITS(inst, 28, 31);
-	inst_base->idx	 = index;
-	inst_base->br	 = NON_BRANCH;
-	inst_base->load_r15 = 0;
+    inst_base->cond  = BITS(inst, 28, 31);
+    inst_base->idx     = index;
+    inst_base->br     = NON_BRANCH;
+    inst_base->load_r15 = 0;
 
-	inst_cream->dp_operation = BIT(inst, 8);
-	inst_cream->instr = inst;
+    inst_cream->dp_operation = BIT(inst, 8);
+    inst_cream->instr = inst;
 
-	
-	return inst_base;
+
+    return inst_base;
 }
 #endif
 #ifdef VFP_INTERPRETER_IMPL
 VCVTBFI_INST:
 {
-	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
-		CHECK_VFP_ENABLED;
-		
-		DBG("VCVT(BFI) :\n");
-		
-		vcvtbfi_inst *inst_cream = (vcvtbfi_inst *)inst_base->component;
+    if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        DBG("VCVT(BFI) :\n");
+
+        vcvtbfi_inst *inst_cream = (vcvtbfi_inst *)inst_base->component;
 
-		int ret;
-		
-		if (inst_cream->dp_operation)
-			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
-		else
-			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+        int ret;
 
-		CHECK_VFP_CDP_RET;
-	}
-	cpu->Reg[15] += GET_INST_SIZE(cpu);
-	INC_PC(sizeof(vcvtbfi_inst));
-	FETCH_INST;
-	GOTO_NEXT_INST;
+        if (inst_cream->dp_operation)
+            ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+        else
+            ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+        CHECK_VFP_CDP_RET;
+    }
+    cpu->Reg[15] += GET_INST_SIZE(cpu);
+    INC_PC(sizeof(vcvtbfi_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
 }
 #endif
 
@@ -2081,87 +2060,87 @@ DYNCOM_FILL_ACTION(vcvtbfi),
 #ifdef VFP_DYNCOM_TAG
 int DYNCOM_TAG(vcvtbfi)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
 {
-	int instr_size = INSTR_SIZE;
-	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
-	DBG("\t\tin %s, instruction will be executed out of JIT.\n", __FUNCTION__);
-	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
-	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
-	return instr_size;
+    int instr_size = INSTR_SIZE;
+    //DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+    DBG("\t\tin %s, instruction will be executed out of JIT.\n", __FUNCTION__);
+    //arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+    arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+    return instr_size;
 }
 #endif
 #ifdef VFP_DYNCOM_TRANS
 int DYNCOM_TRANS(vcvtbfi)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
-	DBG("\t\tin %s, instruction will be executed out of JIT.\n", __FUNCTION__);
-	//arch_arm_undef(cpu, bb, instr);
-	unsigned int opc2 = BITS(16,18);
-	int to_integer = ((opc2 >> 2) == 1);	
-	int dp_op =  (BIT(8) == 1);
-	unsigned int op = BIT(7);
-	int m,d;
-	Value* v;
-	Value* hi;
-	Value* lo;
-	Value* v64; 
-	if(to_integer){
-		d = BIT(22) | (BITS(12,15) << 1);
-		if(dp_op)
-			m = BITS(0,3) | BIT(5) << 4;
-		else
-			m = BIT(5) | BITS(0,3) << 1;
-	}else {
-		m = BIT(5) | BITS(0,3) << 1;
-		if(dp_op)
-			d = BITS(12,15) | BIT(22) << 4;
- 		else
-			d  = BIT(22) | BITS(12,15) << 1;		
-	}
-	if(to_integer){
-		if(dp_op){
-			lo = FR32(m * 2);
-		        hi = FR32(m * 2 + 1);	
-			hi = ZEXT64(IBITCAST32(hi));
-			lo = ZEXT64(IBITCAST32(lo));
-			v64 = OR(SHL(hi,CONST64(32)),lo);	
-			if(BIT(16)){
-				v = FPTOSI(32,FPBITCAST64(v64));
-			}
-			else
-				v = FPTOUI(32,FPBITCAST64(v64));
-				
-				v = FPBITCAST32(v);
-				LETFPS(d,v);
-		}else {
-			v = FR32(m);
-			if(BIT(16)){
-				
-				v = FPTOSI(32,v);
-			}
-			else
-				v = FPTOUI(32,v);
-				LETFPS(d,FPBITCAST32(v));
-		}
-	}else {
-		if(dp_op){	
-			v = IBITCAST32(FR32(m));
-			if(BIT(7))
-				v64 = SITOFP(64,v); 
-			else
-				v64 = UITOFP(64,v);
-			v = IBITCAST64(v64);
-			hi = FPBITCAST32(TRUNC32(LSHR(v,CONST64(32))));
-			lo = FPBITCAST32(TRUNC32(AND(v,CONST64(0xffffffff))));
-			LETFPS(2 * d , lo);
-			LETFPS(2 * d + 1, hi);
-		}else {
-			v = IBITCAST32(FR32(m));
-			if(BIT(7))
-				v = SITOFP(32,v);
-			else
-				v = UITOFP(32,v);
-				LETFPS(d,v);
-		}
-	}
-	return No_exp;
+    DBG("\t\tin %s, instruction will be executed out of JIT.\n", __FUNCTION__);
+    //arch_arm_undef(cpu, bb, instr);
+    unsigned int opc2 = BITS(16,18);
+    int to_integer = ((opc2 >> 2) == 1);    
+    int dp_op =  (BIT(8) == 1);
+    unsigned int op = BIT(7);
+    int m,d;
+    Value* v;
+    Value* hi;
+    Value* lo;
+    Value* v64; 
+    if(to_integer){
+        d = BIT(22) | (BITS(12,15) << 1);
+        if(dp_op)
+            m = BITS(0,3) | BIT(5) << 4;
+        else
+            m = BIT(5) | BITS(0,3) << 1;
+    }else {
+        m = BIT(5) | BITS(0,3) << 1;
+        if(dp_op)
+            d = BITS(12,15) | BIT(22) << 4;
+        else
+            d  = BIT(22) | BITS(12,15) << 1;        
+    }
+    if(to_integer){
+        if(dp_op){
+            lo = FR32(m * 2);
+            hi = FR32(m * 2 + 1);    
+            hi = ZEXT64(IBITCAST32(hi));
+            lo = ZEXT64(IBITCAST32(lo));
+            v64 = OR(SHL(hi,CONST64(32)),lo);    
+            if(BIT(16)){
+                v = FPTOSI(32,FPBITCAST64(v64));
+            }
+            else
+                v = FPTOUI(32,FPBITCAST64(v64));
+
+            v = FPBITCAST32(v);
+            LETFPS(d,v);
+        }else {
+            v = FR32(m);
+            if(BIT(16)){
+
+                v = FPTOSI(32,v);
+            }
+            else
+                v = FPTOUI(32,v);
+            LETFPS(d,FPBITCAST32(v));
+        }
+    }else {
+        if(dp_op){    
+            v = IBITCAST32(FR32(m));
+            if(BIT(7))
+                v64 = SITOFP(64,v); 
+            else
+                v64 = UITOFP(64,v);
+            v = IBITCAST64(v64);
+            hi = FPBITCAST32(TRUNC32(LSHR(v,CONST64(32))));
+            lo = FPBITCAST32(TRUNC32(AND(v,CONST64(0xffffffff))));
+            LETFPS(2 * d , lo);
+            LETFPS(2 * d + 1, hi);
+        }else {
+            v = IBITCAST32(FR32(m));
+            if(BIT(7))
+                v = SITOFP(32,v);
+            else
+                v = UITOFP(32,v);
+            LETFPS(d,v);
+        }
+    }
+    return No_exp;
 }
 
 /**
@@ -2173,15 +2152,15 @@ int DYNCOM_TRANS(vcvtbfi)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc)
 * @return 
 */
 int vcvtbfi_instr_impl(arm_core_t* cpu, uint32 instr){
-	int dp_operation = BIT(8);
-	int ret;
-	if (dp_operation)
-		ret = vfp_double_cpdo(cpu, instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
-	else
-		ret = vfp_single_cpdo(cpu, instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+    int dp_operation = BIT(8);
+    int ret;
+    if (dp_operation)
+        ret = vfp_double_cpdo(cpu, instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+    else
+        ret = vfp_single_cpdo(cpu, instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
 
-	vfp_raise_exceptions(cpu, ret, instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
-	return 0;
+    vfp_raise_exceptions(cpu, ret, instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+    return 0;
 }
 #endif
 
@@ -2196,45 +2175,45 @@ int vcvtbfi_instr_impl(arm_core_t* cpu, uint32 instr){
 /* cond 1110 op11 CRn- Rt-- copr op21 CRm- MRC */
 #ifdef VFP_INTERPRETER_STRUCT
 typedef struct _vmovbrs_inst {
-	unsigned int to_arm;
-	unsigned int t;
-	unsigned int n;
+    unsigned int to_arm;
+    unsigned int t;
+    unsigned int n;
 } vmovbrs_inst;
 #endif
 #ifdef VFP_INTERPRETER_TRANS
 ARM_INST_PTR INTERPRETER_TRANSLATE(vmovbrs)(unsigned int inst, int index)
 {
-	VFP_DEBUG_TRANSLATE;
-	
-	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vmovbrs_inst));
-	vmovbrs_inst *inst_cream = (vmovbrs_inst *)inst_base->component;
+    VFP_DEBUG_TRANSLATE;
+
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vmovbrs_inst));
+    vmovbrs_inst *inst_cream = (vmovbrs_inst *)inst_base->component;
 
-	inst_base->cond  = BITS(inst, 28, 31);
-	inst_base->idx     = index;
-	inst_base->br     = NON_BRANCH;
-	inst_base->load_r15 = 0;
+    inst_base->cond  = BITS(inst, 28, 31);
+    inst_base->idx     = index;
+    inst_base->br     = NON_BRANCH;
+    inst_base->load_r15 = 0;
 
-	inst_cream->to_arm   = BIT(inst, 20) == 1;
-	inst_cream->t        = BITS(inst, 12, 15);
-	inst_cream->n        = BIT(inst, 7) | BITS(inst, 16, 19)<<1;
+    inst_cream->to_arm   = BIT(inst, 20) == 1;
+    inst_cream->t        = BITS(inst, 12, 15);
+    inst_cream->n        = BIT(inst, 7) | BITS(inst, 16, 19)<<1;
 
-	return inst_base;
+    return inst_base;
 }
 #endif
 #ifdef VFP_INTERPRETER_IMPL
 VMOVBRS_INST:
 {
-	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
-		CHECK_VFP_ENABLED;
+    if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
 
-		vmovbrs_inst *inst_cream = (vmovbrs_inst *)inst_base->component;
+        vmovbrs_inst *inst_cream = (vmovbrs_inst *)inst_base->component;
 
-		VMOVBRS(cpu, inst_cream->to_arm, inst_cream->t, inst_cream->n, &(cpu->Reg[inst_cream->t]));
-	}
-	cpu->Reg[15] += GET_INST_SIZE(cpu);
-	INC_PC(sizeof(vmovbrs_inst));
-	FETCH_INST;
-	GOTO_NEXT_INST;
+        VMOVBRS(cpu, inst_cream->to_arm, inst_cream->t, inst_cream->n, &(cpu->Reg[inst_cream->t]));
+    }
+    cpu->Reg[15] += GET_INST_SIZE(cpu);
+    INC_PC(sizeof(vmovbrs_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
 }
 #endif
 
@@ -2244,30 +2223,30 @@ DYNCOM_FILL_ACTION(vmovbrs),
 #ifdef VFP_DYNCOM_TAG
 int DYNCOM_TAG(vmovbrs)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
 {
-	int instr_size = INSTR_SIZE;
-	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
-	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
-	return instr_size;
+    int instr_size = INSTR_SIZE;
+    //DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+    arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+    return instr_size;
 }
 #endif
 #ifdef VFP_DYNCOM_TRANS
 int DYNCOM_TRANS(vmovbrs)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
-	DBG("VMOV(BRS) :\n");
-	int to_arm   = BIT(20) == 1;
-	int t        = BITS(12, 15);
-	int n        = BIT(7) | BITS(16, 19)<<1;
+    DBG("VMOV(BRS) :\n");
+    int to_arm   = BIT(20) == 1;
+    int t        = BITS(12, 15);
+    int n        = BIT(7) | BITS(16, 19)<<1;
 
-	if (to_arm)
-	{
-		DBG("\tr%d <= s%d\n", t, n);
-		LET(t, IBITCAST32(FR32(n)));
-	}
-	else
-	{
-		DBG("\ts%d <= r%d\n", n, t);
-		LETFPS(n, FPBITCAST32(R(t)));
-	}
-	return No_exp;
+    if (to_arm)
+    {
+        DBG("\tr%d <= s%d\n", t, n);
+        LET(t, IBITCAST32(FR32(n)));
+    }
+    else
+    {
+        DBG("\ts%d <= r%d\n", n, t);
+        LETFPS(n, FPBITCAST32(R(t)));
+    }
+    return No_exp;
 }
 #endif
 
@@ -2277,46 +2256,46 @@ int DYNCOM_TRANS(vmovbrs)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc)
 /* cond 1110 op10 CRn- Rt-- copr op21 CRm- MCR */
 #ifdef VFP_INTERPRETER_STRUCT
 typedef struct _vmsr_inst {
-	unsigned int reg;
-	unsigned int Rd;
+    unsigned int reg;
+    unsigned int Rd;
 } vmsr_inst;
 #endif
 #ifdef VFP_INTERPRETER_TRANS
 ARM_INST_PTR INTERPRETER_TRANSLATE(vmsr)(unsigned int inst, int index)
 {
-	VFP_DEBUG_TRANSLATE;
-	
-	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vmsr_inst));
-	vmsr_inst *inst_cream = (vmsr_inst *)inst_base->component;
+    VFP_DEBUG_TRANSLATE;
+
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vmsr_inst));
+    vmsr_inst *inst_cream = (vmsr_inst *)inst_base->component;
+
+    inst_base->cond  = BITS(inst, 28, 31);
+    inst_base->idx     = index;
+    inst_base->br     = NON_BRANCH;
+    inst_base->load_r15 = 0;
 
-	inst_base->cond  = BITS(inst, 28, 31);
-	inst_base->idx     = index;
-	inst_base->br     = NON_BRANCH;
-	inst_base->load_r15 = 0;
+    inst_cream->reg  = BITS(inst, 16, 19);
+    inst_cream->Rd   = BITS(inst, 12, 15);
 
-	inst_cream->reg  = BITS(inst, 16, 19);
-	inst_cream->Rd   = BITS(inst, 12, 15);
-   
-	return inst_base;
+    return inst_base;
 }
 #endif
 #ifdef VFP_INTERPRETER_IMPL
 VMSR_INST:
 {
-	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
-		/* FIXME: special case for access to FPSID and FPEXC, VFP must be disabled ,
-		   and in privilegied mode */
-		/* Exceptions must be checked, according to v7 ref manual */
-		CHECK_VFP_ENABLED;
-           
-		vmsr_inst *inst_cream = (vmsr_inst *)inst_base->component;
+    if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+        /* FIXME: special case for access to FPSID and FPEXC, VFP must be disabled ,
+           and in privilegied mode */
+        /* Exceptions must be checked, according to v7 ref manual */
+        CHECK_VFP_ENABLED;
 
-		VMSR(cpu, inst_cream->reg, inst_cream->Rd);
-	}
-	cpu->Reg[15] += GET_INST_SIZE(cpu);
-	INC_PC(sizeof(vmsr_inst));
-	FETCH_INST;
-	GOTO_NEXT_INST;
+        vmsr_inst *inst_cream = (vmsr_inst *)inst_base->component;
+
+        VMSR(cpu, inst_cream->reg, inst_cream->Rd);
+    }
+    cpu->Reg[15] += GET_INST_SIZE(cpu);
+    INC_PC(sizeof(vmsr_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
 }
 #endif
 
@@ -2326,45 +2305,45 @@ DYNCOM_FILL_ACTION(vmsr),
 #ifdef VFP_DYNCOM_TAG
 int DYNCOM_TAG(vmsr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
 {
-	int instr_size = INSTR_SIZE;
-	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
-	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
-	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
-	return instr_size;
+    int instr_size = INSTR_SIZE;
+    //DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+    //arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+    arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+    return instr_size;
 }
 #endif
 #ifdef VFP_DYNCOM_TRANS
 int DYNCOM_TRANS(vmsr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
-	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
-	//arch_arm_undef(cpu, bb, instr);
-	DBG("VMSR :");
-	if(RD == 15) {
-		printf("in %s is not implementation.\n", __FUNCTION__);
-		exit(-1);
-	}
-	
-	Value *data = NULL;
-	int reg = RN;
-	int Rt   = RD;
-	if (reg == 1)
-	{
-		LET(VFP_FPSCR, R(Rt));
-		DBG("\tflags <= fpscr\n");
-	}
-	else
-	{
-		switch (reg)
-		{
-		case 8:
-			LET(VFP_FPEXC, R(Rt));
-			DBG("\tfpexc <= r%d \n", Rt);
-			break;
-		default:
-			DBG("\tSUBARCHITECTURE DEFINED\n");
-			break;
-		}
-	}
-	return No_exp;
+    //DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+    //arch_arm_undef(cpu, bb, instr);
+    DBG("VMSR :");
+    if(RD == 15) {
+        printf("in %s is not implementation.\n", __FUNCTION__);
+        exit(-1);
+    }
+
+    Value *data = NULL;
+    int reg = RN;
+    int Rt   = RD;
+    if (reg == 1)
+    {
+        LET(VFP_FPSCR, R(Rt));
+        DBG("\tflags <= fpscr\n");
+    }
+    else
+    {
+        switch (reg)
+        {
+        case 8:
+            LET(VFP_FPEXC, R(Rt));
+            DBG("\tfpexc <= r%d \n", Rt);
+            break;
+        default:
+            DBG("\tSUBARCHITECTURE DEFINED\n");
+            break;
+        }
+    }
+    return No_exp;
 }
 #endif
 
@@ -2374,48 +2353,48 @@ int DYNCOM_TRANS(vmsr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
 /* cond 1110 op10 CRn- Rt-- copr op21 CRm- MCR */
 #ifdef VFP_INTERPRETER_STRUCT
 typedef struct _vmovbrc_inst {
-	unsigned int esize;
-	unsigned int index;
-	unsigned int d;
-	unsigned int t;
+    unsigned int esize;
+    unsigned int index;
+    unsigned int d;
+    unsigned int t;
 } vmovbrc_inst;
 #endif
 #ifdef VFP_INTERPRETER_TRANS
 ARM_INST_PTR INTERPRETER_TRANSLATE(vmovbrc)(unsigned int inst, int index)
 {
-	VFP_DEBUG_TRANSLATE;
-	
-	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vmovbrc_inst));
-	vmovbrc_inst *inst_cream = (vmovbrc_inst *)inst_base->component;
+    VFP_DEBUG_TRANSLATE;
+
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vmovbrc_inst));
+    vmovbrc_inst *inst_cream = (vmovbrc_inst *)inst_base->component;
 
-	inst_base->cond  = BITS(inst, 28, 31);
-	inst_base->idx     = index;
-	inst_base->br     = NON_BRANCH;
-	inst_base->load_r15 = 0;
+    inst_base->cond  = BITS(inst, 28, 31);
+    inst_base->idx     = index;
+    inst_base->br     = NON_BRANCH;
+    inst_base->load_r15 = 0;
 
-	inst_cream->d     = BITS(inst, 16, 19)|BIT(inst, 7)<<4;
-	inst_cream->t     = BITS(inst, 12, 15);
-	/* VFP variant of instruction */
-	inst_cream->esize = 32;
-	inst_cream->index = BIT(inst, 21);
-   
-	return inst_base;
+    inst_cream->d     = BITS(inst, 16, 19)|BIT(inst, 7)<<4;
+    inst_cream->t     = BITS(inst, 12, 15);
+    /* VFP variant of instruction */
+    inst_cream->esize = 32;
+    inst_cream->index = BIT(inst, 21);
+
+    return inst_base;
 }
 #endif
 #ifdef VFP_INTERPRETER_IMPL
 VMOVBRC_INST:
 {
-	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
-		CHECK_VFP_ENABLED;
-		
-		vmovbrc_inst *inst_cream = (vmovbrc_inst *)inst_base->component;
-		
-		VFP_DEBUG_UNIMPLEMENTED(VMOVBRC);
-	}
-	cpu->Reg[15] += GET_INST_SIZE(cpu);
-	INC_PC(sizeof(vmovbrc_inst));
-	FETCH_INST;
-	GOTO_NEXT_INST;
+    if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        vmovbrc_inst *inst_cream = (vmovbrc_inst *)inst_base->component;
+
+        VFP_DEBUG_UNIMPLEMENTED(VMOVBRC);
+    }
+    cpu->Reg[15] += GET_INST_SIZE(cpu);
+    INC_PC(sizeof(vmovbrc_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
 }
 #endif
 
@@ -2425,17 +2404,17 @@ DYNCOM_FILL_ACTION(vmovbrc),
 #ifdef VFP_DYNCOM_TAG
 int DYNCOM_TAG(vmovbrc)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
 {
-	int instr_size = INSTR_SIZE;
-	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
-	arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
-	return instr_size;
+    int instr_size = INSTR_SIZE;
+    DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+    arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+    return instr_size;
 }
 #endif
 #ifdef VFP_DYNCOM_TRANS
 int DYNCOM_TRANS(vmovbrc)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
-	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
-	arch_arm_undef(cpu, bb, instr);
-	return No_exp;
+    DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+    arch_arm_undef(cpu, bb, instr);
+    return No_exp;
 }
 #endif
 
@@ -2445,88 +2424,88 @@ int DYNCOM_TRANS(vmovbrc)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc)
 /* cond 1110 op11 CRn- Rt-- copr op21 CRm- MRC */
 #ifdef VFP_INTERPRETER_STRUCT
 typedef struct _vmrs_inst {
-	unsigned int reg;
-	unsigned int Rt;
+    unsigned int reg;
+    unsigned int Rt;
 } vmrs_inst;
 #endif
 #ifdef VFP_INTERPRETER_TRANS
 ARM_INST_PTR INTERPRETER_TRANSLATE(vmrs)(unsigned int inst, int index)
 {
-	VFP_DEBUG_TRANSLATE;
-	
-	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vmrs_inst));
-	vmrs_inst *inst_cream = (vmrs_inst *)inst_base->component;
+    VFP_DEBUG_TRANSLATE;
+
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vmrs_inst));
+    vmrs_inst *inst_cream = (vmrs_inst *)inst_base->component;
 
-	inst_base->cond  = BITS(inst, 28, 31);
-	inst_base->idx     = index;
-	inst_base->br     = NON_BRANCH;
-	inst_base->load_r15 = 0;
+    inst_base->cond  = BITS(inst, 28, 31);
+    inst_base->idx     = index;
+    inst_base->br     = NON_BRANCH;
+    inst_base->load_r15 = 0;
 
-	inst_cream->reg  = BITS(inst, 16, 19);
-	inst_cream->Rt	 = BITS(inst, 12, 15);
+    inst_cream->reg  = BITS(inst, 16, 19);
+    inst_cream->Rt     = BITS(inst, 12, 15);
 
-	return inst_base;
+    return inst_base;
 }
 #endif
 #ifdef VFP_INTERPRETER_IMPL
 VMRS_INST:
 {
-	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
-		/* FIXME: special case for access to FPSID and FPEXC, VFP must be disabled,
-		   and in privilegied mode */
-		/* Exceptions must be checked, according to v7 ref manual */
-		CHECK_VFP_ENABLED;
-		
-		vmrs_inst *inst_cream = (vmrs_inst *)inst_base->component;
-		
-		DBG("VMRS :");
-	
-		if (inst_cream->reg == 1) /* FPSCR */
-		{
-			if (inst_cream->Rt != 15)
-			{	
-				cpu->Reg[inst_cream->Rt] = cpu->VFP[VFP_OFFSET(VFP_FPSCR)];
-				DBG("\tr%d <= fpscr[%08x]\n", inst_cream->Rt, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
-			}
-			else
-			{	
-				cpu->NFlag = (cpu->VFP[VFP_OFFSET(VFP_FPSCR)] >> 31) & 1;
-				cpu->ZFlag = (cpu->VFP[VFP_OFFSET(VFP_FPSCR)] >> 30) & 1;
-				cpu->CFlag = (cpu->VFP[VFP_OFFSET(VFP_FPSCR)] >> 29) & 1;
-				cpu->VFlag = (cpu->VFP[VFP_OFFSET(VFP_FPSCR)] >> 28) & 1;
-				DBG("\tflags <= fpscr[%1xxxxxxxx]\n", cpu->VFP[VFP_OFFSET(VFP_FPSCR)]>>28);
-			}
-		} 
-		else
-		{
-			switch (inst_cream->reg)
-			{
-			case 0:
-				cpu->Reg[inst_cream->Rt] = cpu->VFP[VFP_OFFSET(VFP_FPSID)];
-				DBG("\tr%d <= fpsid[%08x]\n", inst_cream->Rt, cpu->VFP[VFP_OFFSET(VFP_FPSID)]);
-				break;
-			case 6:
-				/* MVFR1, VFPv3 only ? */
-				DBG("\tr%d <= MVFR1 unimplemented\n", inst_cream->Rt);
-				break;
-			case 7:
-				/* MVFR0, VFPv3 only? */
-				DBG("\tr%d <= MVFR0 unimplemented\n", inst_cream->Rt);
-				break;
-			case 8:
-				cpu->Reg[inst_cream->Rt] = cpu->VFP[VFP_OFFSET(VFP_FPEXC)];
-				DBG("\tr%d <= fpexc[%08x]\n", inst_cream->Rt, cpu->VFP[VFP_OFFSET(VFP_FPEXC)]);
-				break;
-			default:
-				DBG("\tSUBARCHITECTURE DEFINED\n");
-				break;
-			}
-		}
-	}
-	cpu->Reg[15] += GET_INST_SIZE(cpu);
-	INC_PC(sizeof(vmrs_inst));
-	FETCH_INST;
-	GOTO_NEXT_INST;
+    if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+        /* FIXME: special case for access to FPSID and FPEXC, VFP must be disabled,
+           and in privilegied mode */
+        /* Exceptions must be checked, according to v7 ref manual */
+        CHECK_VFP_ENABLED;
+
+        vmrs_inst *inst_cream = (vmrs_inst *)inst_base->component;
+
+        DBG("VMRS :");
+
+        if (inst_cream->reg == 1) /* FPSCR */
+        {
+            if (inst_cream->Rt != 15)
+            {    
+                cpu->Reg[inst_cream->Rt] = cpu->VFP[VFP_OFFSET(VFP_FPSCR)];
+                DBG("\tr%d <= fpscr[%08x]\n", inst_cream->Rt, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+            }
+            else
+            {    
+                cpu->NFlag = (cpu->VFP[VFP_OFFSET(VFP_FPSCR)] >> 31) & 1;
+                cpu->ZFlag = (cpu->VFP[VFP_OFFSET(VFP_FPSCR)] >> 30) & 1;
+                cpu->CFlag = (cpu->VFP[VFP_OFFSET(VFP_FPSCR)] >> 29) & 1;
+                cpu->VFlag = (cpu->VFP[VFP_OFFSET(VFP_FPSCR)] >> 28) & 1;
+                DBG("\tflags <= fpscr[%1xxxxxxxx]\n", cpu->VFP[VFP_OFFSET(VFP_FPSCR)]>>28);
+            }
+        } 
+        else
+        {
+            switch (inst_cream->reg)
+            {
+            case 0:
+                cpu->Reg[inst_cream->Rt] = cpu->VFP[VFP_OFFSET(VFP_FPSID)];
+                DBG("\tr%d <= fpsid[%08x]\n", inst_cream->Rt, cpu->VFP[VFP_OFFSET(VFP_FPSID)]);
+                break;
+            case 6:
+                /* MVFR1, VFPv3 only ? */
+                DBG("\tr%d <= MVFR1 unimplemented\n", inst_cream->Rt);
+                break;
+            case 7:
+                /* MVFR0, VFPv3 only? */
+                DBG("\tr%d <= MVFR0 unimplemented\n", inst_cream->Rt);
+                break;
+            case 8:
+                cpu->Reg[inst_cream->Rt] = cpu->VFP[VFP_OFFSET(VFP_FPEXC)];
+                DBG("\tr%d <= fpexc[%08x]\n", inst_cream->Rt, cpu->VFP[VFP_OFFSET(VFP_FPEXC)]);
+                break;
+            default:
+                DBG("\tSUBARCHITECTURE DEFINED\n");
+                break;
+            }
+        }
+    }
+    cpu->Reg[15] += GET_INST_SIZE(cpu);
+    INC_PC(sizeof(vmrs_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
 }
 #endif
 
@@ -2536,64 +2515,64 @@ DYNCOM_FILL_ACTION(vmrs),
 #ifdef VFP_DYNCOM_TAG
 int DYNCOM_TAG(vmrs)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
 {
-	int instr_size = INSTR_SIZE;
-	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
-	DBG("\t\tin %s .\n", __FUNCTION__);
-	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
-	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
-	return instr_size;
+    int instr_size = INSTR_SIZE;
+    //DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+    DBG("\t\tin %s .\n", __FUNCTION__);
+    //arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+    arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+    return instr_size;
 }
 #endif
 #ifdef VFP_DYNCOM_TRANS
 int DYNCOM_TRANS(vmrs)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
-	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
-	//arch_arm_undef(cpu, bb, instr);
-	
-	Value *data = NULL;
-	int reg = BITS(16, 19);;
-	int Rt   = BITS(12, 15);
-	DBG("VMRS : reg=%d, Rt=%d\n", reg, Rt);
-	if (reg == 1)
-	{
-		if (Rt != 15)
-		{
-			LET(Rt, R(VFP_FPSCR));
-			DBG("\tr%d <= fpscr\n", Rt);
-		}
-		else
-		{
-			//LET(Rt, R(VFP_FPSCR));
-			update_cond_from_fpscr(cpu, instr, bb, pc);
-			DBG("In %s, \tflags <= fpscr\n", __FUNCTION__);
-		}
-	}
-	else
-	{
-		switch (reg)
-		{
-		case 0:
-			LET(Rt, R(VFP_FPSID));
-			DBG("\tr%d <= fpsid\n", Rt);
-			break;
-		case 6:
-			/* MVFR1, VFPv3 only ? */
-			DBG("\tr%d <= MVFR1 unimplemented\n", Rt);
-			break;
-		case 7:
-			/* MVFR0, VFPv3 only? */
-			DBG("\tr%d <= MVFR0 unimplemented\n", Rt);
-			break;
-		case 8:
-			LET(Rt, R(VFP_FPEXC));
-			DBG("\tr%d <= fpexc\n", Rt);
-			break;
-		default:
-			DBG("\tSUBARCHITECTURE DEFINED\n");
-			break;
-		}
-	}
-
-	return No_exp;
+    //DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+    //arch_arm_undef(cpu, bb, instr);
+
+    Value *data = NULL;
+    int reg = BITS(16, 19);;
+    int Rt   = BITS(12, 15);
+    DBG("VMRS : reg=%d, Rt=%d\n", reg, Rt);
+    if (reg == 1)
+    {
+        if (Rt != 15)
+        {
+            LET(Rt, R(VFP_FPSCR));
+            DBG("\tr%d <= fpscr\n", Rt);
+        }
+        else
+        {
+            //LET(Rt, R(VFP_FPSCR));
+            update_cond_from_fpscr(cpu, instr, bb, pc);
+            DBG("In %s, \tflags <= fpscr\n", __FUNCTION__);
+        }
+    }
+    else
+    {
+        switch (reg)
+        {
+        case 0:
+            LET(Rt, R(VFP_FPSID));
+            DBG("\tr%d <= fpsid\n", Rt);
+            break;
+        case 6:
+            /* MVFR1, VFPv3 only ? */
+            DBG("\tr%d <= MVFR1 unimplemented\n", Rt);
+            break;
+        case 7:
+            /* MVFR0, VFPv3 only? */
+            DBG("\tr%d <= MVFR0 unimplemented\n", Rt);
+            break;
+        case 8:
+            LET(Rt, R(VFP_FPEXC));
+            DBG("\tr%d <= fpexc\n", Rt);
+            break;
+        default:
+            DBG("\tSUBARCHITECTURE DEFINED\n");
+            break;
+        }
+    }
+
+    return No_exp;
 }
 #endif
 
@@ -2603,48 +2582,48 @@ int DYNCOM_TRANS(vmrs)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
 /* cond 1110 op11 CRn- Rt-- copr op21 CRm- MCR */
 #ifdef VFP_INTERPRETER_STRUCT
 typedef struct _vmovbcr_inst {
-	unsigned int esize;
-	unsigned int index;
-	unsigned int d;
-	unsigned int t;
+    unsigned int esize;
+    unsigned int index;
+    unsigned int d;
+    unsigned int t;
 } vmovbcr_inst;
 #endif
 #ifdef VFP_INTERPRETER_TRANS
 ARM_INST_PTR INTERPRETER_TRANSLATE(vmovbcr)(unsigned int inst, int index)
 {
-	VFP_DEBUG_TRANSLATE;
-	
-	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vmovbcr_inst));
-	vmovbcr_inst *inst_cream = (vmovbcr_inst *)inst_base->component;
+    VFP_DEBUG_TRANSLATE;
+
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vmovbcr_inst));
+    vmovbcr_inst *inst_cream = (vmovbcr_inst *)inst_base->component;
+
+    inst_base->cond  = BITS(inst, 28, 31);
+    inst_base->idx     = index;
+    inst_base->br     = NON_BRANCH;
+    inst_base->load_r15 = 0;
 
-	inst_base->cond  = BITS(inst, 28, 31);
-	inst_base->idx     = index;
-	inst_base->br     = NON_BRANCH;
-	inst_base->load_r15 = 0;
+    inst_cream->d     = BITS(inst, 16, 19)|BIT(inst, 7)<<4;
+    inst_cream->t     = BITS(inst, 12, 15);
+    /* VFP variant of instruction */
+    inst_cream->esize = 32;
+    inst_cream->index = BIT(inst, 21);
 
-	inst_cream->d     = BITS(inst, 16, 19)|BIT(inst, 7)<<4;
-	inst_cream->t     = BITS(inst, 12, 15);
-	/* VFP variant of instruction */
-	inst_cream->esize = 32;
-	inst_cream->index = BIT(inst, 21);
-   
-	return inst_base;
+    return inst_base;
 }
 #endif
 #ifdef VFP_INTERPRETER_IMPL
 VMOVBCR_INST:
 {
-	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
-		CHECK_VFP_ENABLED;
-		
-		vmovbcr_inst *inst_cream = (vmovbcr_inst *)inst_base->component;
-		
-		VFP_DEBUG_UNIMPLEMENTED(VMOVBCR);
-	}
-	cpu->Reg[15] += GET_INST_SIZE(cpu);
-	INC_PC(sizeof(vmovbcr_inst));
-	FETCH_INST;
-	GOTO_NEXT_INST;
+    if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        vmovbcr_inst *inst_cream = (vmovbcr_inst *)inst_base->component;
+
+        VFP_DEBUG_UNIMPLEMENTED(VMOVBCR);
+    }
+    cpu->Reg[15] += GET_INST_SIZE(cpu);
+    INC_PC(sizeof(vmovbcr_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
 }
 #endif
 
@@ -2654,17 +2633,17 @@ DYNCOM_FILL_ACTION(vmovbcr),
 #ifdef VFP_DYNCOM_TAG
 int DYNCOM_TAG(vmovbcr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
 {
-	int instr_size = INSTR_SIZE;
-	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
-	arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
-	return instr_size;
+    int instr_size = INSTR_SIZE;
+    DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+    arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+    return instr_size;
 }
 #endif
 #ifdef VFP_DYNCOM_TRANS
 int DYNCOM_TRANS(vmovbcr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
-	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
-	arch_arm_undef(cpu, bb, instr);
-	return No_exp;
+    DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+    arch_arm_undef(cpu, bb, instr);
+    return No_exp;
 }
 #endif
 
@@ -2679,48 +2658,48 @@ int DYNCOM_TRANS(vmovbcr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc)
 /* cond 1100 0101 Rt2- Rt-- copr opc1 CRm- MRRC */
 #ifdef VFP_INTERPRETER_STRUCT
 typedef struct _vmovbrrss_inst {
-	unsigned int to_arm;
-	unsigned int t;
-	unsigned int t2;
-	unsigned int m;
+    unsigned int to_arm;
+    unsigned int t;
+    unsigned int t2;
+    unsigned int m;
 } vmovbrrss_inst;
 #endif
 #ifdef VFP_INTERPRETER_TRANS
 ARM_INST_PTR INTERPRETER_TRANSLATE(vmovbrrss)(unsigned int inst, int index)
 {
-	VFP_DEBUG_TRANSLATE;
-	
-	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vmovbrrss_inst));
-	vmovbrrss_inst *inst_cream = (vmovbrrss_inst *)inst_base->component;
+    VFP_DEBUG_TRANSLATE;
+
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vmovbrrss_inst));
+    vmovbrrss_inst *inst_cream = (vmovbrrss_inst *)inst_base->component;
 
-	inst_base->cond  = BITS(inst, 28, 31);
-	inst_base->idx     = index;
-	inst_base->br     = NON_BRANCH;
-	inst_base->load_r15 = 0;
+    inst_base->cond  = BITS(inst, 28, 31);
+    inst_base->idx     = index;
+    inst_base->br     = NON_BRANCH;
+    inst_base->load_r15 = 0;
 
-	inst_cream->to_arm     = BIT(inst, 20) == 1;
-	inst_cream->t          = BITS(inst, 12, 15);
-	inst_cream->t2         = BITS(inst, 16, 19);
-	inst_cream->m          = BITS(inst, 0, 3)<<1|BIT(inst, 5);
+    inst_cream->to_arm     = BIT(inst, 20) == 1;
+    inst_cream->t          = BITS(inst, 12, 15);
+    inst_cream->t2         = BITS(inst, 16, 19);
+    inst_cream->m          = BITS(inst, 0, 3)<<1|BIT(inst, 5);
 
-	return inst_base;
+    return inst_base;
 }
 #endif
 #ifdef VFP_INTERPRETER_IMPL
 VMOVBRRSS_INST:
 {
-	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
-		CHECK_VFP_ENABLED;
+    if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
 
-		vmovbrrss_inst* const inst_cream = (vmovbrrss_inst*)inst_base->component;
+        vmovbrrss_inst* const inst_cream = (vmovbrrss_inst*)inst_base->component;
 
-		VMOVBRRSS(cpu, inst_cream->to_arm, inst_cream->t, inst_cream->t2, inst_cream->m,
-		         &cpu->Reg[inst_cream->t], &cpu->Reg[inst_cream->t2]);
-	}
-	cpu->Reg[15] += GET_INST_SIZE(cpu);
-	INC_PC(sizeof(vmovbrrss_inst));
-	FETCH_INST;
-	GOTO_NEXT_INST;
+        VMOVBRRSS(cpu, inst_cream->to_arm, inst_cream->t, inst_cream->t2, inst_cream->m,
+            &cpu->Reg[inst_cream->t], &cpu->Reg[inst_cream->t2]);
+    }
+    cpu->Reg[15] += GET_INST_SIZE(cpu);
+    INC_PC(sizeof(vmovbrrss_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
 }
 #endif
 #ifdef VFP_DYNCOM_TABLE
@@ -2729,31 +2708,31 @@ DYNCOM_FILL_ACTION(vmovbrrss),
 #ifdef VFP_DYNCOM_TAG
 int DYNCOM_TAG(vmovbrrss)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
 {
-	int instr_size = INSTR_SIZE;
+    int instr_size = INSTR_SIZE;
 
-	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
-	if (instr >> 28 != 0xE)
-		*tag |= TAG_CONDITIONAL;
+    arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+    if (instr >> 28 != 0xE)
+        *tag |= TAG_CONDITIONAL;
 
-	return instr_size;
+    return instr_size;
 }
 #endif
 #ifdef VFP_DYNCOM_TRANS
 int DYNCOM_TRANS(vmovbrrss)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc)
 {
-	int to_arm   = BIT(20) == 1;
-	int t        = BITS(12, 15);
-	int t2       = BITS(16, 19);
-	int n        = BIT(5)<<4 | BITS(0, 3);
-	if (to_arm) {
-		LET(t, IBITCAST32(FR32(n + 0)));
-		LET(t2, IBITCAST32(FR32(n + 1)));
-	}
-	else {
-		LETFPS(n + 0, FPBITCAST32(R(t)));
-		LETFPS(n + 1, FPBITCAST32(R(t2)));
-	}
-	return No_exp;
+    int to_arm   = BIT(20) == 1;
+    int t        = BITS(12, 15);
+    int t2       = BITS(16, 19);
+    int n        = BIT(5)<<4 | BITS(0, 3);
+    if (to_arm) {
+        LET(t, IBITCAST32(FR32(n + 0)));
+        LET(t2, IBITCAST32(FR32(n + 1)));
+    }
+    else {
+        LETFPS(n + 0, FPBITCAST32(R(t)));
+        LETFPS(n + 1, FPBITCAST32(R(t2)));
+    }
+    return No_exp;
 }
 #endif
 
@@ -2763,48 +2742,48 @@ int DYNCOM_TRANS(vmovbrrss)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t p
 /* cond 1100 0101 Rt2- Rt-- copr opc1 CRm- MRRC */
 #ifdef VFP_INTERPRETER_STRUCT
 typedef struct _vmovbrrd_inst {
-	unsigned int to_arm;
-	unsigned int t;
-	unsigned int t2;
-	unsigned int m;
+    unsigned int to_arm;
+    unsigned int t;
+    unsigned int t2;
+    unsigned int m;
 } vmovbrrd_inst;
 #endif
 #ifdef VFP_INTERPRETER_TRANS
 ARM_INST_PTR INTERPRETER_TRANSLATE(vmovbrrd)(unsigned int inst, int index)
 {
-	VFP_DEBUG_TRANSLATE;
-	
-	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vmovbrrd_inst));
-	vmovbrrd_inst *inst_cream = (vmovbrrd_inst *)inst_base->component;
+    VFP_DEBUG_TRANSLATE;
 
-	inst_base->cond  = BITS(inst, 28, 31);
-	inst_base->idx     = index;
-	inst_base->br     = NON_BRANCH;
-	inst_base->load_r15 = 0;
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vmovbrrd_inst));
+    vmovbrrd_inst *inst_cream = (vmovbrrd_inst *)inst_base->component;
 
-	inst_cream->to_arm   = BIT(inst, 20) == 1;
-	inst_cream->t        = BITS(inst, 12, 15);
-	inst_cream->t2       = BITS(inst, 16, 19);
-	inst_cream->m        = BIT(inst, 5)<<4 | BITS(inst, 0, 3);
+    inst_base->cond  = BITS(inst, 28, 31);
+    inst_base->idx     = index;
+    inst_base->br     = NON_BRANCH;
+    inst_base->load_r15 = 0;
 
-	return inst_base;
+    inst_cream->to_arm   = BIT(inst, 20) == 1;
+    inst_cream->t        = BITS(inst, 12, 15);
+    inst_cream->t2       = BITS(inst, 16, 19);
+    inst_cream->m        = BIT(inst, 5)<<4 | BITS(inst, 0, 3);
+
+    return inst_base;
 }
 #endif
 #ifdef VFP_INTERPRETER_IMPL
 VMOVBRRD_INST:
 {
-	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
-		CHECK_VFP_ENABLED;
-		
-		vmovbrrd_inst *inst_cream = (vmovbrrd_inst *)inst_base->component;
-		
-		VMOVBRRD(cpu, inst_cream->to_arm, inst_cream->t, inst_cream->t2, inst_cream->m, 
-				&(cpu->Reg[inst_cream->t]), &(cpu->Reg[inst_cream->t2]));
-	}
-	cpu->Reg[15] += GET_INST_SIZE(cpu);
-	INC_PC(sizeof(vmovbrrd_inst));
-	FETCH_INST;
-	GOTO_NEXT_INST;
+    if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        vmovbrrd_inst *inst_cream = (vmovbrrd_inst *)inst_base->component;
+
+        VMOVBRRD(cpu, inst_cream->to_arm, inst_cream->t, inst_cream->t2, inst_cream->m, 
+            &(cpu->Reg[inst_cream->t]), &(cpu->Reg[inst_cream->t2]));
+    }
+    cpu->Reg[15] += GET_INST_SIZE(cpu);
+    INC_PC(sizeof(vmovbrrd_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
 }
 #endif
 
@@ -2814,31 +2793,31 @@ DYNCOM_FILL_ACTION(vmovbrrd),
 #ifdef VFP_DYNCOM_TAG
 int DYNCOM_TAG(vmovbrrd)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
 {
-	int instr_size = INSTR_SIZE;
-	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
-	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
-	if(instr >> 28 != 0xe)
-		*tag |= TAG_CONDITIONAL;
-	return instr_size;
+    int instr_size = INSTR_SIZE;
+    //DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+    arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+    if(instr >> 28 != 0xe)
+        *tag |= TAG_CONDITIONAL;
+    return instr_size;
 }
 #endif
 #ifdef VFP_DYNCOM_TRANS
 int DYNCOM_TRANS(vmovbrrd)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
-	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
-	//arch_arm_undef(cpu, bb, instr);
-	int to_arm   = BIT(20) == 1;
-	int t        = BITS(12, 15);
-	int t2       = BITS(16, 19);
-	int n        = BIT(5)<<4 | BITS(0, 3);
-	if(to_arm){
-		LET(t, IBITCAST32(FR32(n * 2)));
-		LET(t2, IBITCAST32(FR32(n * 2 + 1)));
-	}
-	else{
-		LETFPS(n * 2, FPBITCAST32(R(t)));
-		LETFPS(n * 2 + 1, FPBITCAST32(R(t2)));
-	}
-	return No_exp;
+    //DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+    //arch_arm_undef(cpu, bb, instr);
+    int to_arm   = BIT(20) == 1;
+    int t        = BITS(12, 15);
+    int t2       = BITS(16, 19);
+    int n        = BIT(5)<<4 | BITS(0, 3);
+    if(to_arm){
+        LET(t, IBITCAST32(FR32(n * 2)));
+        LET(t2, IBITCAST32(FR32(n * 2 + 1)));
+    }
+    else{
+        LETFPS(n * 2, FPBITCAST32(R(t)));
+        LETFPS(n * 2 + 1, FPBITCAST32(R(t2)));
+    }
+    return No_exp;
 }
 #endif
 
@@ -2852,60 +2831,60 @@ int DYNCOM_TRANS(vmovbrrd)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc
 /* cond 1101 UD00 Rn-- Vd-- 101X imm8 imm8 */
 #ifdef VFP_INTERPRETER_STRUCT
 typedef struct _vstr_inst {
-	unsigned int single;
-	unsigned int n;
-	unsigned int d;
-	unsigned int imm32;
-	unsigned int add;
+    unsigned int single;
+    unsigned int n;
+    unsigned int d;
+    unsigned int imm32;
+    unsigned int add;
 } vstr_inst;
 #endif
 #ifdef VFP_INTERPRETER_TRANS
 ARM_INST_PTR INTERPRETER_TRANSLATE(vstr)(unsigned int inst, int index)
 {
-	VFP_DEBUG_TRANSLATE;
-	
-	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vstr_inst));
-	vstr_inst *inst_cream = (vstr_inst *)inst_base->component;
+    VFP_DEBUG_TRANSLATE;
+
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vstr_inst));
+    vstr_inst *inst_cream = (vstr_inst *)inst_base->component;
 
-	inst_base->cond  = BITS(inst, 28, 31);
-	inst_base->idx	 = index;
-	inst_base->br	 = NON_BRANCH;
-	inst_base->load_r15 = 0;
-	
-	inst_cream->single = BIT(inst, 8) == 0;
-	inst_cream->add	   = BIT(inst, 23);
-	inst_cream->imm32  = BITS(inst, 0,7) << 2;
-	inst_cream->d      = (inst_cream->single ? BITS(inst, 12, 15)<<1|BIT(inst, 22) : BITS(inst, 12, 15)|BIT(inst, 22)<<4);
-	inst_cream->n	   = BITS(inst, 16, 19);
+    inst_base->cond  = BITS(inst, 28, 31);
+    inst_base->idx     = index;
+    inst_base->br     = NON_BRANCH;
+    inst_base->load_r15 = 0;
 
-	return inst_base;
+    inst_cream->single = BIT(inst, 8) == 0;
+    inst_cream->add       = BIT(inst, 23);
+    inst_cream->imm32  = BITS(inst, 0,7) << 2;
+    inst_cream->d      = (inst_cream->single ? BITS(inst, 12, 15)<<1|BIT(inst, 22) : BITS(inst, 12, 15)|BIT(inst, 22)<<4);
+    inst_cream->n       = BITS(inst, 16, 19);
+
+    return inst_base;
 }
 #endif
 #ifdef VFP_INTERPRETER_IMPL
 VSTR_INST:
 {
-	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
-		CHECK_VFP_ENABLED;
-		
-		vstr_inst *inst_cream = (vstr_inst *)inst_base->component;
-		
-		unsigned int base = (inst_cream->n == 15 ? (cpu->Reg[inst_cream->n] & 0xFFFFFFFC) + 8 : cpu->Reg[inst_cream->n]);
-		addr = (inst_cream->add ? base + inst_cream->imm32 : base - inst_cream->imm32);
-
-		if (inst_cream->single)
-		{
-			Memory::Write32(addr, cpu->ExtReg[inst_cream->d]);
-		}
-		else
-		{
-			Memory::Write32(addr, cpu->ExtReg[inst_cream->d*2]);
-			Memory::Write32(addr + 4, cpu->ExtReg[inst_cream->d*2+1]);
-		}
-	}
-	cpu->Reg[15] += GET_INST_SIZE(cpu);
-	INC_PC(sizeof(vstr_inst));
-	FETCH_INST;
-	GOTO_NEXT_INST;
+    if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        vstr_inst *inst_cream = (vstr_inst *)inst_base->component;
+
+        unsigned int base = (inst_cream->n == 15 ? (cpu->Reg[inst_cream->n] & 0xFFFFFFFC) + 8 : cpu->Reg[inst_cream->n]);
+        addr = (inst_cream->add ? base + inst_cream->imm32 : base - inst_cream->imm32);
+
+        if (inst_cream->single)
+        {
+            Memory::Write32(addr, cpu->ExtReg[inst_cream->d]);
+        }
+        else
+        {
+            Memory::Write32(addr, cpu->ExtReg[inst_cream->d*2]);
+            Memory::Write32(addr + 4, cpu->ExtReg[inst_cream->d*2+1]);
+        }
+    }
+    cpu->Reg[15] += GET_INST_SIZE(cpu);
+    INC_PC(sizeof(vstr_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
 }
 #endif
 
@@ -2915,61 +2894,46 @@ DYNCOM_FILL_ACTION(vstr),
 #ifdef VFP_DYNCOM_TAG
 int DYNCOM_TAG(vstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
 {
-	int instr_size = INSTR_SIZE;
-	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
-	DBG("In %s, pc=0x%x, next_pc=0x%x\n", __FUNCTION__, pc, *next_pc);
-	*tag |= TAG_NEW_BB;
-	if(instr >> 28 != 0xe)
-		*tag |= TAG_CONDITIONAL;
+    int instr_size = INSTR_SIZE;
+    arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+    DBG("In %s, pc=0x%x, next_pc=0x%x\n", __FUNCTION__, pc, *next_pc);
+    *tag |= TAG_NEW_BB;
+    if(instr >> 28 != 0xe)
+        *tag |= TAG_CONDITIONAL;
 
-	return instr_size;
+    return instr_size;
 }
 #endif
 #ifdef VFP_DYNCOM_TRANS
 int DYNCOM_TRANS(vstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
-	int single = BIT(8) == 0;
-	int add	   = BIT(23);
-	int imm32  = BITS(0,7) << 2;
-	int d      = (single ? BITS(12, 15)<<1|BIT(22) : BITS(12, 15)|(BIT(22)<<4));
-	int n	   = BITS(16, 19);
-
-	Value* base = (n == 15) ? ADD(AND(R(n), CONST(0xFFFFFFFC)), CONST(8)): R(n);
-	Value* Addr = add ? ADD(base, CONST(imm32)) : SUB(base, CONST(imm32));
-	DBG("VSTR :\n");
-	//if(single)
-	//	bb = arch_check_mm(cpu, bb, Addr, 4, 0, cpu->dyncom_engine->bb_trap);
-	//else
-	//	bb = arch_check_mm(cpu, bb, Addr, 8, 0, cpu->dyncom_engine->bb_trap);
-	//Value* phys_addr;
-	if(single){
-		#if 0
-		phys_addr = get_phys_addr(cpu, bb, Addr, 0);
-		bb = cpu->dyncom_engine->bb;
-		arch_write_memory(cpu, bb, phys_addr, RSPR(d), 32);
-		#endif
-		//memory_write(cpu, bb, Addr, RSPR(d), 32);
-		memory_write(cpu, bb, Addr, IBITCAST32(FR32(d)), 32);
-		bb = cpu->dyncom_engine->bb;
-	}
-	else{
-		#if 0
-		phys_addr = get_phys_addr(cpu, bb, Addr, 0);
-		bb = cpu->dyncom_engine->bb;
-		arch_write_memory(cpu, bb, phys_addr, RSPR(d * 2), 32);
-		#endif
-		//memory_write(cpu, bb, Addr, RSPR(d * 2), 32);
-		memory_write(cpu, bb, Addr, IBITCAST32(FR32(d * 2)), 32);
-		bb = cpu->dyncom_engine->bb;
-		#if 0
-		phys_addr = get_phys_addr(cpu, bb, ADD(Addr, CONST(4)), 0);
-		bb = cpu->dyncom_engine->bb;
-		arch_write_memory(cpu, bb, phys_addr, RSPR(d * 2 + 1), 32);
-		#endif
-		//memory_write(cpu, bb, ADD(Addr, CONST(4)), RSPR(d * 2 + 1), 32);
-		memory_write(cpu, bb, ADD(Addr, CONST(4)), IBITCAST32(FR32(d * 2 + 1)), 32);
-		bb = cpu->dyncom_engine->bb;
-	}
-	return No_exp;
+    int single = BIT(8) == 0;
+    int add       = BIT(23);
+    int imm32  = BITS(0,7) << 2;
+    int d      = (single ? BITS(12, 15)<<1|BIT(22) : BITS(12, 15)|(BIT(22)<<4));
+    int n       = BITS(16, 19);
+
+    Value* base = (n == 15) ? ADD(AND(R(n), CONST(0xFFFFFFFC)), CONST(8)): R(n);
+    Value* Addr = add ? ADD(base, CONST(imm32)) : SUB(base, CONST(imm32));
+    DBG("VSTR :\n");
+    //if(single)
+    //    bb = arch_check_mm(cpu, bb, Addr, 4, 0, cpu->dyncom_engine->bb_trap);
+    //else
+    //    bb = arch_check_mm(cpu, bb, Addr, 8, 0, cpu->dyncom_engine->bb_trap);
+    //Value* phys_addr;
+    if(single){
+        //memory_write(cpu, bb, Addr, RSPR(d), 32);
+        memory_write(cpu, bb, Addr, IBITCAST32(FR32(d)), 32);
+        bb = cpu->dyncom_engine->bb;
+    }
+    else{
+        //memory_write(cpu, bb, Addr, RSPR(d * 2), 32);
+        memory_write(cpu, bb, Addr, IBITCAST32(FR32(d * 2)), 32);
+        bb = cpu->dyncom_engine->bb;
+        //memory_write(cpu, bb, ADD(Addr, CONST(4)), RSPR(d * 2 + 1), 32);
+        memory_write(cpu, bb, ADD(Addr, CONST(4)), IBITCAST32(FR32(d * 2 + 1)), 32);
+        bb = cpu->dyncom_engine->bb;
+    }
+    return No_exp;
 }
 #endif
 
@@ -2978,64 +2942,64 @@ int DYNCOM_TRANS(vstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
 /* cond 1101 0D10 1101 Vd-- 101X imm8 imm8 */
 #ifdef VFP_INTERPRETER_STRUCT
 typedef struct _vpush_inst {
-	unsigned int single;
-	unsigned int d;
-	unsigned int imm32;
-	unsigned int regs;
+    unsigned int single;
+    unsigned int d;
+    unsigned int imm32;
+    unsigned int regs;
 } vpush_inst;
 #endif
 #ifdef VFP_INTERPRETER_TRANS
 ARM_INST_PTR INTERPRETER_TRANSLATE(vpush)(unsigned int inst, int index)
 {
-	VFP_DEBUG_TRANSLATE;
-	
-	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vpush_inst));
-	vpush_inst *inst_cream = (vpush_inst *)inst_base->component;
+    VFP_DEBUG_TRANSLATE;
 
-	inst_base->cond  = BITS(inst, 28, 31);
-	inst_base->idx     = index;
-	inst_base->br     = NON_BRANCH;
-	inst_base->load_r15 = 0;
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vpush_inst));
+    vpush_inst *inst_cream = (vpush_inst *)inst_base->component;
 
-	inst_cream->single  = BIT(inst, 8) == 0;
-	inst_cream->d       = (inst_cream->single ? BITS(inst, 12, 15)<<1|BIT(inst, 22) : BITS(inst, 12, 15)|BIT(inst, 22)<<4);
-	inst_cream->imm32   = BITS(inst, 0, 7)<<2;
-	inst_cream->regs    = (inst_cream->single ? BITS(inst, 0, 7) : BITS(inst, 1, 7));
+    inst_base->cond  = BITS(inst, 28, 31);
+    inst_base->idx     = index;
+    inst_base->br     = NON_BRANCH;
+    inst_base->load_r15 = 0;
 
-	return inst_base;
+    inst_cream->single  = BIT(inst, 8) == 0;
+    inst_cream->d       = (inst_cream->single ? BITS(inst, 12, 15)<<1|BIT(inst, 22) : BITS(inst, 12, 15)|BIT(inst, 22)<<4);
+    inst_cream->imm32   = BITS(inst, 0, 7)<<2;
+    inst_cream->regs    = (inst_cream->single ? BITS(inst, 0, 7) : BITS(inst, 1, 7));
+
+    return inst_base;
 }
 #endif
 #ifdef VFP_INTERPRETER_IMPL
 VPUSH_INST:
 {
-	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
-		CHECK_VFP_ENABLED;
-		int i;
-
-		vpush_inst *inst_cream = (vpush_inst *)inst_base->component;
-
-		addr = cpu->Reg[R13] - inst_cream->imm32;
-
-		for (i = 0; i < inst_cream->regs; i++)
-		{
-			if (inst_cream->single)
-			{
-				Memory::Write32(addr, cpu->ExtReg[inst_cream->d+i]);
-				addr += 4;
-			}
-			else
-			{
-				Memory::Write32(addr, cpu->ExtReg[(inst_cream->d+i)*2]);
-				Memory::Write32(addr + 4, cpu->ExtReg[(inst_cream->d+i)*2 + 1]);
-				addr += 8;
-			}
-		}
-		cpu->Reg[R13] = cpu->Reg[R13] - inst_cream->imm32;
-	}
-	cpu->Reg[15] += GET_INST_SIZE(cpu);
-	INC_PC(sizeof(vpush_inst));
-	FETCH_INST;
-	GOTO_NEXT_INST;
+    if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+        int i;
+
+        vpush_inst *inst_cream = (vpush_inst *)inst_base->component;
+
+        addr = cpu->Reg[R13] - inst_cream->imm32;
+
+        for (i = 0; i < inst_cream->regs; i++)
+        {
+            if (inst_cream->single)
+            {
+                Memory::Write32(addr, cpu->ExtReg[inst_cream->d+i]);
+                addr += 4;
+            }
+            else
+            {
+                Memory::Write32(addr, cpu->ExtReg[(inst_cream->d+i)*2]);
+                Memory::Write32(addr + 4, cpu->ExtReg[(inst_cream->d+i)*2 + 1]);
+                addr += 8;
+            }
+        }
+        cpu->Reg[R13] = cpu->Reg[R13] - inst_cream->imm32;
+    }
+    cpu->Reg[15] += GET_INST_SIZE(cpu);
+    INC_PC(sizeof(vpush_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
 }
 #endif
 #ifdef VFP_DYNCOM_TABLE
@@ -3044,72 +3008,57 @@ DYNCOM_FILL_ACTION(vpush),
 #ifdef VFP_DYNCOM_TAG
 int DYNCOM_TAG(vpush)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
 {
-	int instr_size = INSTR_SIZE;
-	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
-	DBG("In %s, pc=0x%x, next_pc=0x%x\n", __FUNCTION__, pc, *next_pc);
-	*tag |= TAG_NEW_BB;
-	if(instr >> 28 != 0xe)
-		*tag |= TAG_CONDITIONAL;
+    int instr_size = INSTR_SIZE;
+    arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+    DBG("In %s, pc=0x%x, next_pc=0x%x\n", __FUNCTION__, pc, *next_pc);
+    *tag |= TAG_NEW_BB;
+    if(instr >> 28 != 0xe)
+        *tag |= TAG_CONDITIONAL;
 
-	return instr_size;
+    return instr_size;
 }
 #endif
 #ifdef VFP_DYNCOM_TRANS
 int DYNCOM_TRANS(vpush)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
-	int single  = BIT(8) == 0;
-	int d       = (single ? BITS(12, 15)<<1|BIT(22) : BITS(12, 15)|(BIT(22)<<4));
-	int imm32   = BITS(0, 7)<<2;
-	int regs    = (single ? BITS(0, 7) : BITS(1, 7));
-
-	DBG("\t\tin %s \n", __FUNCTION__);
-	Value* Addr = SUB(R(13), CONST(imm32));
-	//if(single)
-	//	bb = arch_check_mm(cpu, bb, Addr, regs * 4, 0, cpu->dyncom_engine->bb_trap);
-	//else
-	//	bb = arch_check_mm(cpu, bb, Addr, regs * 8, 0, cpu->dyncom_engine->bb_trap);
-	//Value* phys_addr;
-	int i;
-	for (i = 0; i < regs; i++)
-	{
-		if (single)
-		{
-			//Memory::Write32(addr, cpu->ExtReg[inst_cream->d+i]);
-			#if 0
-			phys_addr = get_phys_addr(cpu, bb, Addr, 0);
-			bb = cpu->dyncom_engine->bb;
-			arch_write_memory(cpu, bb, phys_addr, RSPR(d + i), 32);
-			#endif
-			//memory_write(cpu, bb, Addr, RSPR(d + i), 32);
-			memory_write(cpu, bb, Addr, IBITCAST32(FR32(d + i)), 32);
-			bb = cpu->dyncom_engine->bb;
-			Addr = ADD(Addr, CONST(4));
-		}
-		else
-		{
-			/* Careful of endianness, little by default */
-			#if 0
-			phys_addr = get_phys_addr(cpu, bb, Addr, 0);
-			bb = cpu->dyncom_engine->bb;
-			arch_write_memory(cpu, bb, phys_addr, RSPR((d + i) * 2), 32);
-			#endif
-			//memory_write(cpu, bb, Addr, RSPR((d + i) * 2), 32);
-			memory_write(cpu, bb, Addr, IBITCAST32(FR32((d + i) * 2)), 32);
-			bb = cpu->dyncom_engine->bb;
-			#if 0
-			phys_addr = get_phys_addr(cpu, bb, ADD(Addr, CONST(4)), 0);
-			bb = cpu->dyncom_engine->bb;
-			arch_write_memory(cpu, bb, phys_addr, RSPR((d + i) * 2 + 1), 32);
-			#endif
-			//memory_write(cpu, bb, ADD(Addr, CONST(4)), RSPR((d + i) * 2 + 1), 32);
-			memory_write(cpu, bb, ADD(Addr, CONST(4)), IBITCAST32(FR32((d + i) * 2 + 1)), 32);
-			bb = cpu->dyncom_engine->bb;
-
-			Addr = ADD(Addr, CONST(8));
-		}
-	}
-	LET(13, SUB(R(13), CONST(imm32)));
-
-	return No_exp;
+    int single  = BIT(8) == 0;
+    int d       = (single ? BITS(12, 15)<<1|BIT(22) : BITS(12, 15)|(BIT(22)<<4));
+    int imm32   = BITS(0, 7)<<2;
+    int regs    = (single ? BITS(0, 7) : BITS(1, 7));
+
+    DBG("\t\tin %s \n", __FUNCTION__);
+    Value* Addr = SUB(R(13), CONST(imm32));
+    //if(single)
+    //    bb = arch_check_mm(cpu, bb, Addr, regs * 4, 0, cpu->dyncom_engine->bb_trap);
+    //else
+    //    bb = arch_check_mm(cpu, bb, Addr, regs * 8, 0, cpu->dyncom_engine->bb_trap);
+    //Value* phys_addr;
+    int i;
+    for (i = 0; i < regs; i++)
+    {
+        if (single)
+        {
+            //Memory::Write32(addr, cpu->ExtReg[inst_cream->d+i]);
+            //memory_write(cpu, bb, Addr, RSPR(d + i), 32);
+            memory_write(cpu, bb, Addr, IBITCAST32(FR32(d + i)), 32);
+            bb = cpu->dyncom_engine->bb;
+            Addr = ADD(Addr, CONST(4));
+        }
+        else
+        {
+            /* Careful of endianness, little by default */
+            //memory_write(cpu, bb, Addr, RSPR((d + i) * 2), 32);
+            memory_write(cpu, bb, Addr, IBITCAST32(FR32((d + i) * 2)), 32);
+            bb = cpu->dyncom_engine->bb;
+            //memory_write(cpu, bb, ADD(Addr, CONST(4)), RSPR((d + i) * 2 + 1), 32);
+            memory_write(cpu, bb, ADD(Addr, CONST(4)), IBITCAST32(FR32((d + i) * 2 + 1)), 32);
+            bb = cpu->dyncom_engine->bb;
+
+            Addr = ADD(Addr, CONST(8));
+        }
+    }
+    LET(13, SUB(R(13), CONST(imm32)));
+
+    return No_exp;
 }
 #endif
 
@@ -3118,76 +3067,76 @@ int DYNCOM_TRANS(vpush)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
 /* cond 110P UDW0 Rn-- Vd-- 101X imm8 imm8 */
 #ifdef VFP_INTERPRETER_STRUCT
 typedef struct _vstm_inst {
-	unsigned int single;
-	unsigned int add;
-	unsigned int wback;
-	unsigned int d;
-	unsigned int n;
-	unsigned int imm32;
-	unsigned int regs;
+    unsigned int single;
+    unsigned int add;
+    unsigned int wback;
+    unsigned int d;
+    unsigned int n;
+    unsigned int imm32;
+    unsigned int regs;
 } vstm_inst;
 #endif
 #ifdef VFP_INTERPRETER_TRANS
 ARM_INST_PTR INTERPRETER_TRANSLATE(vstm)(unsigned int inst, int index)
 {
-	VFP_DEBUG_TRANSLATE;
-	
-	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vstm_inst));
-	vstm_inst *inst_cream = (vstm_inst *)inst_base->component;
+    VFP_DEBUG_TRANSLATE;
+
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vstm_inst));
+    vstm_inst *inst_cream = (vstm_inst *)inst_base->component;
 
-	inst_base->cond  = BITS(inst, 28, 31);
-	inst_base->idx     = index;
-	inst_base->br     = NON_BRANCH;
-	inst_base->load_r15 = 0;
+    inst_base->cond  = BITS(inst, 28, 31);
+    inst_base->idx     = index;
+    inst_base->br     = NON_BRANCH;
+    inst_base->load_r15 = 0;
 
-	inst_cream->single = BIT(inst, 8) == 0;
-	inst_cream->add    = BIT(inst, 23);
-	inst_cream->wback  = BIT(inst, 21);
-	inst_cream->d      = (inst_cream->single ? BITS(inst, 12, 15)<<1|BIT(inst, 22) : BITS(inst, 12, 15)|BIT(inst, 22)<<4);
-	inst_cream->n      = BITS(inst, 16, 19);
-	inst_cream->imm32  = BITS(inst, 0, 7)<<2;
-	inst_cream->regs   = (inst_cream->single ? BITS(inst, 0, 7) : BITS(inst, 1, 7));
+    inst_cream->single = BIT(inst, 8) == 0;
+    inst_cream->add    = BIT(inst, 23);
+    inst_cream->wback  = BIT(inst, 21);
+    inst_cream->d      = (inst_cream->single ? BITS(inst, 12, 15)<<1|BIT(inst, 22) : BITS(inst, 12, 15)|BIT(inst, 22)<<4);
+    inst_cream->n      = BITS(inst, 16, 19);
+    inst_cream->imm32  = BITS(inst, 0, 7)<<2;
+    inst_cream->regs   = (inst_cream->single ? BITS(inst, 0, 7) : BITS(inst, 1, 7));
 
-	return inst_base;
+    return inst_base;
 }
 #endif
 #ifdef VFP_INTERPRETER_IMPL
 VSTM_INST: /* encoding 1 */
 {
-	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
-		CHECK_VFP_ENABLED;
-		
-		int i;
-		
-		vstm_inst *inst_cream = (vstm_inst *)inst_base->component;
-		
-		addr = (inst_cream->add ? cpu->Reg[inst_cream->n] : cpu->Reg[inst_cream->n] - inst_cream->imm32);
-
-		for (i = 0; i < inst_cream->regs; i++)
-		{
-			if (inst_cream->single)
-			{
-				Memory::Write32(addr, cpu->ExtReg[inst_cream->d+i]);
-				addr += 4;
-			}
-			else
-			{
-				Memory::Write32(addr, cpu->ExtReg[(inst_cream->d+i)*2]);
-				Memory::Write32(addr + 4, cpu->ExtReg[(inst_cream->d+i)*2 + 1]);
-				addr += 8;
-			}
-		}
-		if (inst_cream->wback){
-			cpu->Reg[inst_cream->n] = (inst_cream->add ? cpu->Reg[inst_cream->n] + inst_cream->imm32 : 
-						   cpu->Reg[inst_cream->n] - inst_cream->imm32);
-		}
-
-	}
-	cpu->Reg[15] += 4;
-	INC_PC(sizeof(vstm_inst));
-
-	FETCH_INST;
-	GOTO_NEXT_INST;
+    if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        int i;
+
+        vstm_inst *inst_cream = (vstm_inst *)inst_base->component;
+
+        addr = (inst_cream->add ? cpu->Reg[inst_cream->n] : cpu->Reg[inst_cream->n] - inst_cream->imm32);
+
+        for (i = 0; i < inst_cream->regs; i++)
+        {
+            if (inst_cream->single)
+            {
+                Memory::Write32(addr, cpu->ExtReg[inst_cream->d+i]);
+                addr += 4;
+            }
+            else
+            {
+                Memory::Write32(addr, cpu->ExtReg[(inst_cream->d+i)*2]);
+                Memory::Write32(addr + 4, cpu->ExtReg[(inst_cream->d+i)*2 + 1]);
+                addr += 8;
+            }
+        }
+        if (inst_cream->wback){
+            cpu->Reg[inst_cream->n] = (inst_cream->add ? cpu->Reg[inst_cream->n] + inst_cream->imm32 : 
+                cpu->Reg[inst_cream->n] - inst_cream->imm32);
+        }
+
+    }
+    cpu->Reg[15] += 4;
+    INC_PC(sizeof(vstm_inst));
+
+    FETCH_INST;
+    GOTO_NEXT_INST;
 }
 #endif
 
@@ -3197,90 +3146,75 @@ DYNCOM_FILL_ACTION(vstm),
 #ifdef VFP_DYNCOM_TAG
 int DYNCOM_TAG(vstm)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
 {
-	int instr_size = INSTR_SIZE;
-	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
-	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
-	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
-	DBG("In %s, pc=0x%x, next_pc=0x%x\n", __FUNCTION__, pc, *next_pc);
-	*tag |= TAG_NEW_BB;
-	if(instr >> 28 != 0xe)
-		*tag |= TAG_CONDITIONAL;
+    int instr_size = INSTR_SIZE;
+    //DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+    //arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+    arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+    DBG("In %s, pc=0x%x, next_pc=0x%x\n", __FUNCTION__, pc, *next_pc);
+    *tag |= TAG_NEW_BB;
+    if(instr >> 28 != 0xe)
+        *tag |= TAG_CONDITIONAL;
 
-	return instr_size;
+    return instr_size;
 }
 #endif
 #ifdef VFP_DYNCOM_TRANS
 int DYNCOM_TRANS(vstm)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
-	//arch_arm_undef(cpu, bb, instr);
-	int single = BIT(8) == 0;
-	int add    = BIT(23);
-	int wback  = BIT(21);
-	int d      = single ? BITS(12, 15)<<1|BIT(22) : BITS(12, 15)|(BIT(22)<<4);
-	int n      = BITS(16, 19);
-	int imm32  = BITS(0, 7)<<2;
-	int regs   = single ? BITS(0, 7) : BITS(1, 7);
-
-	Value* Addr = SELECT(CONST1(add), R(n), SUB(R(n), CONST(imm32)));
-	DBG("VSTM \n");
-	//if(single)
-	//	bb = arch_check_mm(cpu, bb, Addr, regs * 4, 0, cpu->dyncom_engine->bb_trap);
-	//else
-	//	bb = arch_check_mm(cpu, bb, Addr, regs * 8, 0, cpu->dyncom_engine->bb_trap);
-
-	int i;	
-	Value* phys_addr;
-	for (i = 0; i < regs; i++)
-	{
-		if (single)
-		{
-			
-			//Memory::Write32(addr, cpu->ExtReg[inst_cream->d+i]);
-			/* if R(i) is R15? */
-			#if 0
-			phys_addr = get_phys_addr(cpu, bb, Addr, 0);
-			bb = cpu->dyncom_engine->bb;
-			arch_write_memory(cpu, bb, phys_addr, RSPR(d + i), 32);
-			#endif
-			//memory_write(cpu, bb, Addr, RSPR(d + i), 32);
-			memory_write(cpu, bb, Addr, IBITCAST32(FR32(d + i)),32);
-			bb = cpu->dyncom_engine->bb;
-			//DBG("\taddr[%x] <= s%d=[%x]\n", addr, inst_cream->d+i, cpu->ExtReg[inst_cream->d+i]);
-			Addr = ADD(Addr, CONST(4));
-		}
-		else
-		{
-		
-			//Memory::Write32(addr, cpu->ExtReg[(inst_cream->d+i)*2]);
-			#if 0
-			phys_addr = get_phys_addr(cpu, bb, Addr, 0);
-			bb = cpu->dyncom_engine->bb;
-			arch_write_memory(cpu, bb, phys_addr, RSPR((d + i) * 2), 32);
-			#endif
-			//memory_write(cpu, bb, Addr, RSPR((d + i) * 2), 32);
-			memory_write(cpu, bb, Addr, IBITCAST32(FR32((d + i) * 2)),32);
-			bb = cpu->dyncom_engine->bb;
-
-			//Memory::Write32(addr + 4, cpu->ExtReg[(inst_cream->d+i)*2 + 1]);
-			#if 0
-			phys_addr = get_phys_addr(cpu, bb, ADD(Addr, CONST(4)), 0);
-			bb = cpu->dyncom_engine->bb;
-			arch_write_memory(cpu, bb, phys_addr, RSPR((d + i) * 2 + 1), 32);
-			#endif
-			//memory_write(cpu, bb, ADD(Addr, CONST(4)), RSPR((d + i) * 2 + 1), 32);
-			memory_write(cpu, bb, ADD(Addr, CONST(4)), IBITCAST32(FR32((d + i) * 2 + 1)), 32);
-			bb = cpu->dyncom_engine->bb;
-			//DBG("\taddr[%x-%x] <= s[%d-%d]=[%x-%x]\n", addr+4, addr, (inst_cream->d+i)*2+1, (inst_cream->d+i)*2, cpu->ExtReg[(inst_cream->d+i)*2+1], cpu->ExtReg[(inst_cream->d+i)*2]);
-			//addr += 8;
-			Addr = ADD(Addr, CONST(8));
-		}
-	}
-	if (wback){
-		//cpu->Reg[n] = (add ? cpu->Reg[n] + imm32 : 
-		//			   cpu->Reg[n] - imm32);
-		LET(n, SELECT(CONST1(add), ADD(R(n), CONST(imm32)), SUB(R(n), CONST(imm32))));
-		DBG("\twback r%d, add=%d, imm32=%d\n", n, add, imm32);
-	}
-	return No_exp;
+    //arch_arm_undef(cpu, bb, instr);
+    int single = BIT(8) == 0;
+    int add    = BIT(23);
+    int wback  = BIT(21);
+    int d      = single ? BITS(12, 15)<<1|BIT(22) : BITS(12, 15)|(BIT(22)<<4);
+    int n      = BITS(16, 19);
+    int imm32  = BITS(0, 7)<<2;
+    int regs   = single ? BITS(0, 7) : BITS(1, 7);
+
+    Value* Addr = SELECT(CONST1(add), R(n), SUB(R(n), CONST(imm32)));
+    DBG("VSTM \n");
+    //if(single)
+    //    bb = arch_check_mm(cpu, bb, Addr, regs * 4, 0, cpu->dyncom_engine->bb_trap);
+    //else
+    //    bb = arch_check_mm(cpu, bb, Addr, regs * 8, 0, cpu->dyncom_engine->bb_trap);
+
+    int i;    
+    Value* phys_addr;
+    for (i = 0; i < regs; i++)
+    {
+        if (single)
+        {
+
+            //Memory::Write32(addr, cpu->ExtReg[inst_cream->d+i]);
+            /* if R(i) is R15? */
+            //memory_write(cpu, bb, Addr, RSPR(d + i), 32);
+            memory_write(cpu, bb, Addr, IBITCAST32(FR32(d + i)),32);
+            bb = cpu->dyncom_engine->bb;
+            //DBG("\taddr[%x] <= s%d=[%x]\n", addr, inst_cream->d+i, cpu->ExtReg[inst_cream->d+i]);
+            Addr = ADD(Addr, CONST(4));
+        }
+        else
+        {
+
+            //Memory::Write32(addr, cpu->ExtReg[(inst_cream->d+i)*2]);
+            //memory_write(cpu, bb, Addr, RSPR((d + i) * 2), 32);
+            memory_write(cpu, bb, Addr, IBITCAST32(FR32((d + i) * 2)),32);
+            bb = cpu->dyncom_engine->bb;
+
+            //Memory::Write32(addr + 4, cpu->ExtReg[(inst_cream->d+i)*2 + 1]);
+            //memory_write(cpu, bb, ADD(Addr, CONST(4)), RSPR((d + i) * 2 + 1), 32);
+            memory_write(cpu, bb, ADD(Addr, CONST(4)), IBITCAST32(FR32((d + i) * 2 + 1)), 32);
+            bb = cpu->dyncom_engine->bb;
+            //DBG("\taddr[%x-%x] <= s[%d-%d]=[%x-%x]\n", addr+4, addr, (inst_cream->d+i)*2+1, (inst_cream->d+i)*2, cpu->ExtReg[(inst_cream->d+i)*2+1], cpu->ExtReg[(inst_cream->d+i)*2]);
+            //addr += 8;
+            Addr = ADD(Addr, CONST(8));
+        }
+    }
+    if (wback){
+        //cpu->Reg[n] = (add ? cpu->Reg[n] + imm32 : 
+        //               cpu->Reg[n] - imm32);
+        LET(n, SELECT(CONST1(add), ADD(R(n), CONST(imm32)), SUB(R(n), CONST(imm32))));
+        DBG("\twback r%d, add=%d, imm32=%d\n", n, add, imm32);
+    }
+    return No_exp;
 }
 #endif
 
@@ -3289,69 +3223,69 @@ int DYNCOM_TRANS(vstm)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
 /* cond 1100 1D11 1101 Vd-- 101X imm8 imm8 */
 #ifdef VFP_INTERPRETER_STRUCT
 typedef struct _vpop_inst {
-	unsigned int single;
-	unsigned int d;
-	unsigned int imm32;
-	unsigned int regs;
+    unsigned int single;
+    unsigned int d;
+    unsigned int imm32;
+    unsigned int regs;
 } vpop_inst;
 #endif
 #ifdef VFP_INTERPRETER_TRANS
 ARM_INST_PTR INTERPRETER_TRANSLATE(vpop)(unsigned int inst, int index)
 {
-	VFP_DEBUG_TRANSLATE;
-	
-	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vpop_inst));
-	vpop_inst *inst_cream = (vpop_inst *)inst_base->component;
+    VFP_DEBUG_TRANSLATE;
+
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vpop_inst));
+    vpop_inst *inst_cream = (vpop_inst *)inst_base->component;
 
-	inst_base->cond  = BITS(inst, 28, 31);
-	inst_base->idx	 = index;
-	inst_base->br	 = NON_BRANCH;
-	inst_base->load_r15 = 0;
+    inst_base->cond  = BITS(inst, 28, 31);
+    inst_base->idx     = index;
+    inst_base->br     = NON_BRANCH;
+    inst_base->load_r15 = 0;
 
-	inst_cream->single  = BIT(inst, 8) == 0;
-	inst_cream->d       = (inst_cream->single ? (BITS(inst, 12, 15)<<1)|BIT(inst, 22) : BITS(inst, 12, 15)|(BIT(inst, 22)<<4));
-	inst_cream->imm32   = BITS(inst, 0, 7)<<2;
-	inst_cream->regs    = (inst_cream->single ? BITS(inst, 0, 7) : BITS(inst, 1, 7));
-	
-	return inst_base;
+    inst_cream->single  = BIT(inst, 8) == 0;
+    inst_cream->d       = (inst_cream->single ? (BITS(inst, 12, 15)<<1)|BIT(inst, 22) : BITS(inst, 12, 15)|(BIT(inst, 22)<<4));
+    inst_cream->imm32   = BITS(inst, 0, 7)<<2;
+    inst_cream->regs    = (inst_cream->single ? BITS(inst, 0, 7) : BITS(inst, 1, 7));
+
+    return inst_base;
 }
 #endif
 #ifdef VFP_INTERPRETER_IMPL
 VPOP_INST:
 {
-	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
-		CHECK_VFP_ENABLED;
-		
-		int i;
-		unsigned int value1, value2;
-
-		vpop_inst *inst_cream = (vpop_inst *)inst_base->component;
-
-		addr = cpu->Reg[R13];
-
-		for (i = 0; i < inst_cream->regs; i++)
-		{
-			if (inst_cream->single)
-			{
-				value1 = Memory::Read32(addr);
-				cpu->ExtReg[inst_cream->d+i] = value1;
-				addr += 4;
-			}
-			else
-			{
-				value1 = Memory::Read32(addr);
-				value2 = Memory::Read32(addr + 4);
-				cpu->ExtReg[(inst_cream->d+i)*2] = value1;
-				cpu->ExtReg[(inst_cream->d+i)*2 + 1] = value2;
-				addr += 8;
-			}
-		}
-		cpu->Reg[R13] = cpu->Reg[R13] + inst_cream->imm32;
-	}
-	cpu->Reg[15] += GET_INST_SIZE(cpu);
-	INC_PC(sizeof(vpop_inst));
-	FETCH_INST;
-	GOTO_NEXT_INST;
+    if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        int i;
+        unsigned int value1, value2;
+
+        vpop_inst *inst_cream = (vpop_inst *)inst_base->component;
+
+        addr = cpu->Reg[R13];
+
+        for (i = 0; i < inst_cream->regs; i++)
+        {
+            if (inst_cream->single)
+            {
+                value1 = Memory::Read32(addr);
+                cpu->ExtReg[inst_cream->d+i] = value1;
+                addr += 4;
+            }
+            else
+            {
+                value1 = Memory::Read32(addr);
+                value2 = Memory::Read32(addr + 4);
+                cpu->ExtReg[(inst_cream->d+i)*2] = value1;
+                cpu->ExtReg[(inst_cream->d+i)*2 + 1] = value2;
+                addr += 8;
+            }
+        }
+        cpu->Reg[R13] = cpu->Reg[R13] + inst_cream->imm32;
+    }
+    cpu->Reg[15] += GET_INST_SIZE(cpu);
+    INC_PC(sizeof(vpop_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
 }
 #endif
 
@@ -3361,82 +3295,67 @@ DYNCOM_FILL_ACTION(vpop),
 #ifdef VFP_DYNCOM_TAG
 int DYNCOM_TAG(vpop)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
 {
-	int instr_size = INSTR_SIZE;
-	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
-	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
-	/* Should check if PC is destination register */
-	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
-	DBG("In %s, pc=0x%x, next_pc=0x%x\n", __FUNCTION__, pc, *next_pc);
-	*tag |= TAG_NEW_BB;
-	if(instr >> 28 != 0xe)
-		*tag |= TAG_CONDITIONAL;
+    int instr_size = INSTR_SIZE;
+    //DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+    //arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+    /* Should check if PC is destination register */
+    arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+    DBG("In %s, pc=0x%x, next_pc=0x%x\n", __FUNCTION__, pc, *next_pc);
+    *tag |= TAG_NEW_BB;
+    if(instr >> 28 != 0xe)
+        *tag |= TAG_CONDITIONAL;
 
-	return instr_size;
+    return instr_size;
 }
 #endif
 #ifdef VFP_DYNCOM_TRANS
 int DYNCOM_TRANS(vpop)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
-	DBG("\t\tin %s instruction .\n", __FUNCTION__);
-	//arch_arm_undef(cpu, bb, instr);
-	int single  = BIT(8) == 0;
-	int d       = (single ? BITS(12, 15)<<1|BIT(22) : BITS(12, 15)|(BIT(22)<<4));
-	int imm32   = BITS(0, 7)<<2;
-	int regs    = (single ? BITS(0, 7) : BITS(1, 7));
-
-	int i;
-	unsigned int value1, value2;
-
-	DBG("VPOP :\n");
-		
-	Value* Addr = R(13);
-	Value* val;
-	//if(single)
-	//	bb = arch_check_mm(cpu, bb, Addr, regs * 4, 1, cpu->dyncom_engine->bb_trap);
-	//else
-	//	bb = arch_check_mm(cpu, bb, Addr, regs * 4, 1, cpu->dyncom_engine->bb_trap);
-	//Value* phys_addr;	
-	for (i = 0; i < regs; i++)
-	{
-		if (single)
-		{
-			#if 0
-			phys_addr = get_phys_addr(cpu, bb, Addr, 1);
-			bb = cpu->dyncom_engine->bb;
-			val = arch_read_memory(cpu,bb,phys_addr,0,32);
-			#endif
-			memory_read(cpu, bb, Addr, 0, 32);
-			bb = cpu->dyncom_engine->bb;
-			val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
-			LETFPS(d + i, FPBITCAST32(val));
-			Addr = ADD(Addr, CONST(4));
-		}
-		else
-		{
-			/* Careful of endianness, little by default */
-			#if 0
-			phys_addr = get_phys_addr(cpu, bb, Addr, 1);
-			bb = cpu->dyncom_engine->bb;
-			val = arch_read_memory(cpu,bb,phys_addr,0,32);
-			#endif
-			memory_read(cpu, bb, Addr, 0, 32);
-			bb = cpu->dyncom_engine->bb;
-			val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
-			LETFPS((d + i) * 2, FPBITCAST32(val));
-			#if 0
-			phys_addr = get_phys_addr(cpu, bb, ADD(Addr, CONST(4)), 1);
-			bb = cpu->dyncom_engine->bb;
-			val = arch_read_memory(cpu,bb,phys_addr,0,32);
-			#endif
-			memory_read(cpu, bb, ADD(Addr, CONST(4)), 0, 32);
-			bb = cpu->dyncom_engine->bb;
-			val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
-			LETFPS((d + i) * 2 + 1, FPBITCAST32(val));
-
-			Addr = ADD(Addr, CONST(8));
-		}
-	}
-	LET(13, ADD(R(13), CONST(imm32)));
-	return No_exp;
+    DBG("\t\tin %s instruction .\n", __FUNCTION__);
+    //arch_arm_undef(cpu, bb, instr);
+    int single  = BIT(8) == 0;
+    int d       = (single ? BITS(12, 15)<<1|BIT(22) : BITS(12, 15)|(BIT(22)<<4));
+    int imm32   = BITS(0, 7)<<2;
+    int regs    = (single ? BITS(0, 7) : BITS(1, 7));
+
+    int i;
+    unsigned int value1, value2;
+
+    DBG("VPOP :\n");
+
+    Value* Addr = R(13);
+    Value* val;
+    //if(single)
+    //    bb = arch_check_mm(cpu, bb, Addr, regs * 4, 1, cpu->dyncom_engine->bb_trap);
+    //else
+    //    bb = arch_check_mm(cpu, bb, Addr, regs * 4, 1, cpu->dyncom_engine->bb_trap);
+    //Value* phys_addr;    
+    for (i = 0; i < regs; i++)
+    {
+        if (single)
+        {
+            memory_read(cpu, bb, Addr, 0, 32);
+            bb = cpu->dyncom_engine->bb;
+            val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
+            LETFPS(d + i, FPBITCAST32(val));
+            Addr = ADD(Addr, CONST(4));
+        }
+        else
+        {
+            /* Careful of endianness, little by default */
+            memory_read(cpu, bb, Addr, 0, 32);
+            bb = cpu->dyncom_engine->bb;
+            val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
+            LETFPS((d + i) * 2, FPBITCAST32(val));
+            memory_read(cpu, bb, ADD(Addr, CONST(4)), 0, 32);
+            bb = cpu->dyncom_engine->bb;
+            val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
+            LETFPS((d + i) * 2 + 1, FPBITCAST32(val));
+
+            Addr = ADD(Addr, CONST(8));
+        }
+    }
+    LET(13, ADD(R(13), CONST(imm32)));
+    return No_exp;
 }
 #endif
 
@@ -3445,64 +3364,64 @@ int DYNCOM_TRANS(vpop)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
 /* cond 1101 UD01 Rn-- Vd-- 101X imm8 imm8 */
 #ifdef VFP_INTERPRETER_STRUCT
 typedef struct _vldr_inst {
-	unsigned int single;
-	unsigned int n;
-	unsigned int d;
-	unsigned int imm32;
-	unsigned int add;
+    unsigned int single;
+    unsigned int n;
+    unsigned int d;
+    unsigned int imm32;
+    unsigned int add;
 } vldr_inst;
 #endif
 #ifdef VFP_INTERPRETER_TRANS
 ARM_INST_PTR INTERPRETER_TRANSLATE(vldr)(unsigned int inst, int index)
 {
-	VFP_DEBUG_TRANSLATE;
-	
-	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vldr_inst));
-	vldr_inst *inst_cream = (vldr_inst *)inst_base->component;
+    VFP_DEBUG_TRANSLATE;
+
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vldr_inst));
+    vldr_inst *inst_cream = (vldr_inst *)inst_base->component;
 
-	inst_base->cond  = BITS(inst, 28, 31);
-	inst_base->idx	 = index;
-	inst_base->br	 = NON_BRANCH;
-	inst_base->load_r15 = 0;
-	
-	inst_cream->single = BIT(inst, 8) == 0;
-	inst_cream->add	   = BIT(inst, 23);
-	inst_cream->imm32  = BITS(inst, 0,7) << 2;
-	inst_cream->d      = (inst_cream->single ? BITS(inst, 12, 15)<<1|BIT(inst, 22) : BITS(inst, 12, 15)|BIT(inst, 22)<<4);
-	inst_cream->n	   = BITS(inst, 16, 19);
+    inst_base->cond  = BITS(inst, 28, 31);
+    inst_base->idx     = index;
+    inst_base->br     = NON_BRANCH;
+    inst_base->load_r15 = 0;
 
-	return inst_base;
+    inst_cream->single = BIT(inst, 8) == 0;
+    inst_cream->add       = BIT(inst, 23);
+    inst_cream->imm32  = BITS(inst, 0,7) << 2;
+    inst_cream->d      = (inst_cream->single ? BITS(inst, 12, 15)<<1|BIT(inst, 22) : BITS(inst, 12, 15)|BIT(inst, 22)<<4);
+    inst_cream->n       = BITS(inst, 16, 19);
+
+    return inst_base;
 }
 #endif
 #ifdef VFP_INTERPRETER_IMPL
 VLDR_INST:
 {
-	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
-		CHECK_VFP_ENABLED;
-		
-		vldr_inst *inst_cream = (vldr_inst *)inst_base->component;
-		
-		unsigned int base = (inst_cream->n == 15 ? (cpu->Reg[inst_cream->n] & 0xFFFFFFFC) + 8 : cpu->Reg[inst_cream->n]);
-		addr = (inst_cream->add ? base + inst_cream->imm32 : base - inst_cream->imm32);
-
-		if (inst_cream->single)
-		{
-			cpu->ExtReg[inst_cream->d] = Memory::Read32(addr);
-		}
-		else
-		{
-			unsigned int word1, word2;
-			word1 = Memory::Read32(addr);
-			word2 = Memory::Read32(addr + 4);
-
-			cpu->ExtReg[inst_cream->d*2] = word1;
-			cpu->ExtReg[inst_cream->d*2+1] = word2;
-		}
-	}
-	cpu->Reg[15] += GET_INST_SIZE(cpu);
-	INC_PC(sizeof(vldr_inst));
-	FETCH_INST;
-	GOTO_NEXT_INST;
+    if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        vldr_inst *inst_cream = (vldr_inst *)inst_base->component;
+
+        unsigned int base = (inst_cream->n == 15 ? (cpu->Reg[inst_cream->n] & 0xFFFFFFFC) + 8 : cpu->Reg[inst_cream->n]);
+        addr = (inst_cream->add ? base + inst_cream->imm32 : base - inst_cream->imm32);
+
+        if (inst_cream->single)
+        {
+            cpu->ExtReg[inst_cream->d] = Memory::Read32(addr);
+        }
+        else
+        {
+            unsigned int word1, word2;
+            word1 = Memory::Read32(addr);
+            word2 = Memory::Read32(addr + 4);
+
+            cpu->ExtReg[inst_cream->d*2] = word1;
+            cpu->ExtReg[inst_cream->d*2+1] = word2;
+        }
+    }
+    cpu->Reg[15] += GET_INST_SIZE(cpu);
+    INC_PC(sizeof(vldr_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
 }
 #endif
 
@@ -3512,76 +3431,61 @@ DYNCOM_FILL_ACTION(vldr),
 #ifdef VFP_DYNCOM_TAG
 int DYNCOM_TAG(vldr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
 {
-	int instr_size = INSTR_SIZE;
-	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
-	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
-	/* Should check if PC is destination register */
-	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
-	DBG("In %s, pc=0x%x, next_pc=0x%x\n", __FUNCTION__, pc, *next_pc);
-	*tag |= TAG_NEW_BB;
-	if(instr >> 28 != 0xe)
-		*tag |= TAG_CONDITIONAL;
+    int instr_size = INSTR_SIZE;
+    //DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+    //arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+    /* Should check if PC is destination register */
+    arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+    DBG("In %s, pc=0x%x, next_pc=0x%x\n", __FUNCTION__, pc, *next_pc);
+    *tag |= TAG_NEW_BB;
+    if(instr >> 28 != 0xe)
+        *tag |= TAG_CONDITIONAL;
 
-	return instr_size;
+    return instr_size;
 }
 #endif
 #ifdef VFP_DYNCOM_TRANS
 int DYNCOM_TRANS(vldr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
-	int single = BIT(8) == 0;
-	int add    = BIT(23);
-	int wback  = BIT(21);
-	int d      = (single ? BITS(12, 15)<<1|BIT(22) : BITS(12, 15)|(BIT(22)<<4));
-	int n      = BITS(16, 19);
-	int imm32  = BITS(0, 7)<<2;
-	int regs   = (single ? BITS(0, 7) : BITS(1, 7));
-	Value* base = R(n);
-	DBG("\t\tin %s .\n", __FUNCTION__);
-	if(n == 15){
-		base = ADD(AND(base, CONST(0xFFFFFFFC)), CONST(8));
-	}
-	Value* Addr = add ? (ADD(base, CONST(imm32))) : (SUB(base, CONST(imm32)));
-	//if(single)
-	//	bb = arch_check_mm(cpu, bb, Addr, 4, 1, cpu->dyncom_engine->bb_trap);
-	//else
-	//	bb = arch_check_mm(cpu, bb, Addr, 8, 1, cpu->dyncom_engine->bb_trap);
-	//Value* phys_addr;
-	Value* val;
-	if(single){
-		#if 0
-		phys_addr = get_phys_addr(cpu, bb, Addr, 1);
-		bb = cpu->dyncom_engine->bb;
-		val = arch_read_memory(cpu,bb,phys_addr,0,32);
-		#endif
-		memory_read(cpu, bb, Addr, 0, 32);
-		bb = cpu->dyncom_engine->bb;
-		val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
-		//LETS(d, val);
-		LETFPS(d,FPBITCAST32(val));
-	}
-	else{
-		#if 0
-		phys_addr = get_phys_addr(cpu, bb, Addr, 1);
-		bb = cpu->dyncom_engine->bb;
-		val = arch_read_memory(cpu,bb,phys_addr,0,32);
-		#endif
-		memory_read(cpu, bb, Addr, 0, 32);
-		bb = cpu->dyncom_engine->bb;
-		val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
-		//LETS(d * 2, val);
-		LETFPS(d * 2,FPBITCAST32(val));
-		#if 0
-		phys_addr = get_phys_addr(cpu, bb, ADD(Addr, CONST(4)), 1);
-		bb = cpu->dyncom_engine->bb;
-		val = arch_read_memory(cpu,bb,phys_addr,0,32);
-		#endif
-		memory_read(cpu, bb, ADD(Addr, CONST(4)), 0,32);
-		bb = cpu->dyncom_engine->bb;
-		val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
-		//LETS(d * 2 + 1, val);
-		LETFPS( d * 2 + 1,FPBITCAST32(val));
-	}
-
-	return No_exp;
+    int single = BIT(8) == 0;
+    int add    = BIT(23);
+    int wback  = BIT(21);
+    int d      = (single ? BITS(12, 15)<<1|BIT(22) : BITS(12, 15)|(BIT(22)<<4));
+    int n      = BITS(16, 19);
+    int imm32  = BITS(0, 7)<<2;
+    int regs   = (single ? BITS(0, 7) : BITS(1, 7));
+    Value* base = R(n);
+    DBG("\t\tin %s .\n", __FUNCTION__);
+    if(n == 15){
+        base = ADD(AND(base, CONST(0xFFFFFFFC)), CONST(8));
+    }
+    Value* Addr = add ? (ADD(base, CONST(imm32))) : (SUB(base, CONST(imm32)));
+    //if(single)
+    //    bb = arch_check_mm(cpu, bb, Addr, 4, 1, cpu->dyncom_engine->bb_trap);
+    //else
+    //    bb = arch_check_mm(cpu, bb, Addr, 8, 1, cpu->dyncom_engine->bb_trap);
+    //Value* phys_addr;
+    Value* val;
+    if(single){
+        memory_read(cpu, bb, Addr, 0, 32);
+        bb = cpu->dyncom_engine->bb;
+        val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
+        //LETS(d, val);
+        LETFPS(d,FPBITCAST32(val));
+    }
+    else{
+        memory_read(cpu, bb, Addr, 0, 32);
+        bb = cpu->dyncom_engine->bb;
+        val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
+        //LETS(d * 2, val);
+        LETFPS(d * 2,FPBITCAST32(val));
+        memory_read(cpu, bb, ADD(Addr, CONST(4)), 0,32);
+        bb = cpu->dyncom_engine->bb;
+        val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
+        //LETS(d * 2 + 1, val);
+        LETFPS( d * 2 + 1,FPBITCAST32(val));
+    }
+
+    return No_exp;
 }
 #endif
 
@@ -3590,76 +3494,76 @@ int DYNCOM_TRANS(vldr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
 /* cond 110P UDW1 Rn-- Vd-- 101X imm8 imm8 */
 #ifdef VFP_INTERPRETER_STRUCT
 typedef struct _vldm_inst {
-	unsigned int single;
-	unsigned int add;
-	unsigned int wback;
-	unsigned int d;
-	unsigned int n;
-	unsigned int imm32;
-	unsigned int regs;
+    unsigned int single;
+    unsigned int add;
+    unsigned int wback;
+    unsigned int d;
+    unsigned int n;
+    unsigned int imm32;
+    unsigned int regs;
 } vldm_inst;
 #endif
 #ifdef VFP_INTERPRETER_TRANS
 ARM_INST_PTR INTERPRETER_TRANSLATE(vldm)(unsigned int inst, int index)
 {
-	VFP_DEBUG_TRANSLATE;
-	
-	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vldm_inst));
-	vldm_inst *inst_cream = (vldm_inst *)inst_base->component;
+    VFP_DEBUG_TRANSLATE;
+
+    arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vldm_inst));
+    vldm_inst *inst_cream = (vldm_inst *)inst_base->component;
 
-	inst_base->cond  = BITS(inst, 28, 31);
-	inst_base->idx     = index;
-	inst_base->br     = NON_BRANCH;
-	inst_base->load_r15 = 0;
+    inst_base->cond  = BITS(inst, 28, 31);
+    inst_base->idx     = index;
+    inst_base->br     = NON_BRANCH;
+    inst_base->load_r15 = 0;
 
-	inst_cream->single = BIT(inst, 8) == 0;
-	inst_cream->add    = BIT(inst, 23);
-	inst_cream->wback  = BIT(inst, 21);
-	inst_cream->d      = (inst_cream->single ? BITS(inst, 12, 15)<<1|BIT(inst, 22) : BITS(inst, 12, 15)|BIT(inst, 22)<<4);
-	inst_cream->n      = BITS(inst, 16, 19);
-	inst_cream->imm32  = BITS(inst, 0, 7)<<2;
-	inst_cream->regs   = (inst_cream->single ? BITS(inst, 0, 7) : BITS(inst, 1, 7));
+    inst_cream->single = BIT(inst, 8) == 0;
+    inst_cream->add    = BIT(inst, 23);
+    inst_cream->wback  = BIT(inst, 21);
+    inst_cream->d      = (inst_cream->single ? BITS(inst, 12, 15)<<1|BIT(inst, 22) : BITS(inst, 12, 15)|BIT(inst, 22)<<4);
+    inst_cream->n      = BITS(inst, 16, 19);
+    inst_cream->imm32  = BITS(inst, 0, 7)<<2;
+    inst_cream->regs   = (inst_cream->single ? BITS(inst, 0, 7) : BITS(inst, 1, 7));
 
-	return inst_base;
+    return inst_base;
 }
 #endif
 #ifdef VFP_INTERPRETER_IMPL
 VLDM_INST:
 {
-	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
-		CHECK_VFP_ENABLED;
-		
-		int i;
-		
-		vldm_inst *inst_cream = (vldm_inst *)inst_base->component;
-		
-		addr = (inst_cream->add ? cpu->Reg[inst_cream->n] : cpu->Reg[inst_cream->n] - inst_cream->imm32);
-
-		for (i = 0; i < inst_cream->regs; i++)
-		{
-			if (inst_cream->single)
-			{
-				cpu->ExtReg[inst_cream->d+i] = Memory::Read32(addr);
-				addr += 4;
-			}
-			else
-			{
-				cpu->ExtReg[(inst_cream->d+i)*2] = Memory::Read32(addr);
-				cpu->ExtReg[(inst_cream->d+i)*2 + 1] = Memory::Read32(addr + 4);
-				addr += 8;
-			}
-		}
-		if (inst_cream->wback){
-			cpu->Reg[inst_cream->n] = (inst_cream->add ? cpu->Reg[inst_cream->n] + inst_cream->imm32 : 
-						   cpu->Reg[inst_cream->n] - inst_cream->imm32);
-			DBG("\twback r%d[%x]\n", inst_cream->n, cpu->Reg[inst_cream->n]);
-		}
-
-	}
-	cpu->Reg[15] += GET_INST_SIZE(cpu);
-	INC_PC(sizeof(vldm_inst));
-	FETCH_INST;
-	GOTO_NEXT_INST;
+    if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+        CHECK_VFP_ENABLED;
+
+        int i;
+
+        vldm_inst *inst_cream = (vldm_inst *)inst_base->component;
+
+        addr = (inst_cream->add ? cpu->Reg[inst_cream->n] : cpu->Reg[inst_cream->n] - inst_cream->imm32);
+
+        for (i = 0; i < inst_cream->regs; i++)
+        {
+            if (inst_cream->single)
+            {
+                cpu->ExtReg[inst_cream->d+i] = Memory::Read32(addr);
+                addr += 4;
+            }
+            else
+            {
+                cpu->ExtReg[(inst_cream->d+i)*2] = Memory::Read32(addr);
+                cpu->ExtReg[(inst_cream->d+i)*2 + 1] = Memory::Read32(addr + 4);
+                addr += 8;
+            }
+        }
+        if (inst_cream->wback){
+            cpu->Reg[inst_cream->n] = (inst_cream->add ? cpu->Reg[inst_cream->n] + inst_cream->imm32 : 
+                cpu->Reg[inst_cream->n] - inst_cream->imm32);
+            DBG("\twback r%d[%x]\n", inst_cream->n, cpu->Reg[inst_cream->n]);
+        }
+
+    }
+    cpu->Reg[15] += GET_INST_SIZE(cpu);
+    INC_PC(sizeof(vldm_inst));
+    FETCH_INST;
+    GOTO_NEXT_INST;
 }
 #endif
 
@@ -3669,91 +3573,76 @@ DYNCOM_FILL_ACTION(vldm),
 #ifdef VFP_DYNCOM_TAG
 int DYNCOM_TAG(vldm)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
 {
-	int instr_size = INSTR_SIZE;
-	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
-	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
-	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
-	DBG("In %s, pc=0x%x, next_pc=0x%x\n", __FUNCTION__, pc, *next_pc);
-	*tag |= TAG_NEW_BB;
-	if(instr >> 28 != 0xe)
-		*tag |= TAG_CONDITIONAL;
+    int instr_size = INSTR_SIZE;
+    //DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+    //arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+    arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+    DBG("In %s, pc=0x%x, next_pc=0x%x\n", __FUNCTION__, pc, *next_pc);
+    *tag |= TAG_NEW_BB;
+    if(instr >> 28 != 0xe)
+        *tag |= TAG_CONDITIONAL;
 
-	return instr_size;
+    return instr_size;
 }
 #endif
 #ifdef VFP_DYNCOM_TRANS
 int DYNCOM_TRANS(vldm)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
-	int single = BIT(8) == 0;
-	int add    = BIT(23);
-	int wback  = BIT(21);
-	int d      = single ? BITS(12, 15)<<1|BIT(22) : BITS(12, 15)|BIT(22)<<4;
-	int n      = BITS(16, 19);
-	int imm32  = BITS(0, 7)<<2;
-	int regs   = single ? BITS(0, 7) : BITS(1, 7);
-
-	Value* Addr = SELECT(CONST1(add), R(n), SUB(R(n), CONST(imm32)));
-	//if(single)
-	//	bb = arch_check_mm(cpu, bb, Addr, regs * 4, 1, cpu->dyncom_engine->bb_trap);
-	//else
-	//	bb = arch_check_mm(cpu, bb, Addr, regs * 4, 1, cpu->dyncom_engine->bb_trap);
-
-	DBG("VLDM \n");
-	int i;	
-	//Value* phys_addr;
-	Value* val;
-	for (i = 0; i < regs; i++)
-	{
-		if (single)
-		{
-			
-			//Memory::Write32(addr, cpu->ExtReg[inst_cream->d+i]);
-			/* if R(i) is R15? */
-			#if 0
-			phys_addr = get_phys_addr(cpu, bb, Addr, 1);
-			bb = cpu->dyncom_engine->bb;
-			val = arch_read_memory(cpu,bb,phys_addr,0,32);
-			#endif
-			memory_read(cpu, bb, Addr, 0, 32);
-			bb = cpu->dyncom_engine->bb;
-			val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
-			//LETS(d + i, val);
-			LETFPS(d + i, FPBITCAST32(val));
-			//DBG("\taddr[%x] <= s%d=[%x]\n", addr, inst_cream->d+i, cpu->ExtReg[inst_cream->d+i]);
-			Addr = ADD(Addr, CONST(4));
-		}
-		else
-		{
-			#if 0	
-			phys_addr = get_phys_addr(cpu, bb, Addr, 1);
-			bb = cpu->dyncom_engine->bb;
-			val = arch_read_memory(cpu,bb,phys_addr,0,32);
-			#endif
-			memory_read(cpu, bb, Addr, 0, 32);
-			bb = cpu->dyncom_engine->bb;
-			val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
-			LETFPS((d + i) * 2, FPBITCAST32(val));
-			#if 0
-			phys_addr = get_phys_addr(cpu, bb, ADD(Addr, CONST(4)), 1);
-			bb = cpu->dyncom_engine->bb;
-			val = arch_read_memory(cpu,bb,phys_addr,0,32);
-			#endif
-			memory_read(cpu, bb, Addr, 0, 32);
-			bb = cpu->dyncom_engine->bb;
-			val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
-			LETFPS((d + i) * 2 + 1, FPBITCAST32(val));
-
-			//Memory::Write(addr + 4, phys_addr, cpu->ExtReg[(inst_cream->d+i)*2 + 1], 32);
-			//DBG("\taddr[%x-%x] <= s[%d-%d]=[%x-%x]\n", addr+4, addr, (inst_cream->d+i)*2+1, (inst_cream->d+i)*2, cpu->ExtReg[(inst_cream->d+i)*2+1], cpu->ExtReg[(inst_cream->d+i)*2]);
-			//addr += 8;
-			Addr = ADD(Addr, CONST(8));
-		}
-	}
-	if (wback){
-		//cpu->Reg[n] = (add ? cpu->Reg[n] + imm32 : 
-		//			   cpu->Reg[n] - imm32);
-		LET(n, SELECT(CONST1(add), ADD(R(n), CONST(imm32)), SUB(R(n), CONST(imm32))));
-		DBG("\twback r%d, add=%d, imm32=%d\n", n, add, imm32);
-	}
-	return No_exp;
+    int single = BIT(8) == 0;
+    int add    = BIT(23);
+    int wback  = BIT(21);
+    int d      = single ? BITS(12, 15)<<1|BIT(22) : BITS(12, 15)|BIT(22)<<4;
+    int n      = BITS(16, 19);
+    int imm32  = BITS(0, 7)<<2;
+    int regs   = single ? BITS(0, 7) : BITS(1, 7);
+
+    Value* Addr = SELECT(CONST1(add), R(n), SUB(R(n), CONST(imm32)));
+    //if(single)
+    //    bb = arch_check_mm(cpu, bb, Addr, regs * 4, 1, cpu->dyncom_engine->bb_trap);
+    //else
+    //    bb = arch_check_mm(cpu, bb, Addr, regs * 4, 1, cpu->dyncom_engine->bb_trap);
+
+    DBG("VLDM \n");
+    int i;    
+    //Value* phys_addr;
+    Value* val;
+    for (i = 0; i < regs; i++)
+    {
+        if (single)
+        {
+
+            //Memory::Write32(addr, cpu->ExtReg[inst_cream->d+i]);
+            /* if R(i) is R15? */
+            memory_read(cpu, bb, Addr, 0, 32);
+            bb = cpu->dyncom_engine->bb;
+            val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
+            //LETS(d + i, val);
+            LETFPS(d + i, FPBITCAST32(val));
+            //DBG("\taddr[%x] <= s%d=[%x]\n", addr, inst_cream->d+i, cpu->ExtReg[inst_cream->d+i]);
+            Addr = ADD(Addr, CONST(4));
+        }
+        else
+        {
+            memory_read(cpu, bb, Addr, 0, 32);
+            bb = cpu->dyncom_engine->bb;
+            val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
+            LETFPS((d + i) * 2, FPBITCAST32(val));
+            memory_read(cpu, bb, Addr, 0, 32);
+            bb = cpu->dyncom_engine->bb;
+            val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
+            LETFPS((d + i) * 2 + 1, FPBITCAST32(val));
+
+            //Memory::Write(addr + 4, phys_addr, cpu->ExtReg[(inst_cream->d+i)*2 + 1], 32);
+            //DBG("\taddr[%x-%x] <= s[%d-%d]=[%x-%x]\n", addr+4, addr, (inst_cream->d+i)*2+1, (inst_cream->d+i)*2, cpu->ExtReg[(inst_cream->d+i)*2+1], cpu->ExtReg[(inst_cream->d+i)*2]);
+            //addr += 8;
+            Addr = ADD(Addr, CONST(8));
+        }
+    }
+    if (wback){
+        //cpu->Reg[n] = (add ? cpu->Reg[n] + imm32 : 
+        //               cpu->Reg[n] - imm32);
+        LET(n, SELECT(CONST1(add), ADD(R(n), CONST(imm32)), SUB(R(n), CONST(imm32))));
+        DBG("\twback r%d, add=%d, imm32=%d\n", n, add, imm32);
+    }
+    return No_exp;
 }
 #endif
diff --git a/src/core/core.cpp b/src/core/core.cpp
index 22213d647..8ac4481cc 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -53,10 +53,10 @@ int Init() {
     g_sys_core = new ARM_Interpreter();
 
     switch (Settings::values.cpu_core) {
-        case CPU_FastInterpreter:
+        case CPU_Interpreter:
             g_app_core = new ARM_DynCom();
             break;
-        case CPU_Interpreter:
+        case CPU_OldInterpreter:
         default:
             g_app_core = new ARM_Interpreter();
             break;
diff --git a/src/core/core.h b/src/core/core.h
index 05dbe0ae5..ecd58a73a 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -13,7 +13,7 @@ namespace Core {
 
 enum CPUCore {
     CPU_Interpreter,
-    CPU_FastInterpreter
+    CPU_OldInterpreter,
 };
 
 extern ARM_Interface*   g_app_core;     ///< ARM11 application core
diff --git a/src/core/file_sys/archive_backend.h b/src/core/file_sys/archive_backend.h
index 1612c35c2..390178f67 100644
--- a/src/core/file_sys/archive_backend.h
+++ b/src/core/file_sys/archive_backend.h
@@ -88,6 +88,7 @@ public:
     const std::string DebugStr() const {
         switch (GetType()) {
         case Invalid:
+        default:
             return "[Invalid]";
         case Empty:
             return "[Empty]";
@@ -117,6 +118,7 @@ public:
             return {};
         case Invalid:
         case Binary:
+        default:
             // TODO(yuriks): Add assert
             LOG_ERROR(Service_FS, "LowPathType cannot be converted to string!");
             return {};
@@ -159,6 +161,7 @@ public:
         case Empty:
             return {};
         case Invalid:
+        default:
             // TODO(yuriks): Add assert
             LOG_ERROR(Service_FS, "LowPathType cannot be converted to binary!");
             return {};
diff --git a/src/core/file_sys/archive_romfs.cpp b/src/core/file_sys/archive_romfs.cpp
index 2fc3831b7..a30f73d0e 100644
--- a/src/core/file_sys/archive_romfs.cpp
+++ b/src/core/file_sys/archive_romfs.cpp
@@ -5,11 +5,10 @@
 #include <memory>
 
 #include "common/common_types.h"
+#include "common/file_util.h"
 #include "common/make_unique.h"
 
 #include "core/file_sys/archive_romfs.h"
-#include "core/file_sys/directory_romfs.h"
-#include "core/file_sys/file_romfs.h"
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 // FileSys namespace
@@ -23,48 +22,4 @@ Archive_RomFS::Archive_RomFS(const Loader::AppLoader& app_loader) {
     }
 }
 
-std::unique_ptr<FileBackend> Archive_RomFS::OpenFile(const Path& path, const Mode mode) const {
-    return Common::make_unique<File_RomFS>(this);
-}
-
-bool Archive_RomFS::DeleteFile(const Path& path) const {
-    LOG_WARNING(Service_FS, "Attempted to delete a file from ROMFS.");
-    return false;
-}
-
-bool Archive_RomFS::RenameFile(const Path& src_path, const Path& dest_path) const {
-    LOG_WARNING(Service_FS, "Attempted to rename a file within ROMFS.");
-    return false;
-}
-
-bool Archive_RomFS::DeleteDirectory(const Path& path) const {
-    LOG_WARNING(Service_FS, "Attempted to delete a directory from ROMFS.");
-    return false;
-}
-
-ResultCode Archive_RomFS::CreateFile(const Path& path, u32 size) const {
-    LOG_WARNING(Service_FS, "Attempted to create a file in ROMFS.");
-    // TODO: Verify error code
-    return ResultCode(ErrorDescription::NotAuthorized, ErrorModule::FS, ErrorSummary::NotSupported, ErrorLevel::Permanent);
-}
-
-bool Archive_RomFS::CreateDirectory(const Path& path) const {
-    LOG_WARNING(Service_FS, "Attempted to create a directory in ROMFS.");
-    return false;
-}
-
-bool Archive_RomFS::RenameDirectory(const Path& src_path, const Path& dest_path) const {
-    LOG_WARNING(Service_FS, "Attempted to rename a file within ROMFS.");
-    return false;
-}
-
-std::unique_ptr<DirectoryBackend> Archive_RomFS::OpenDirectory(const Path& path) const {
-    return Common::make_unique<Directory_RomFS>();
-}
-
-ResultCode Archive_RomFS::Format(const Path& path) const {
-    LOG_WARNING(Service_FS, "Attempted to format ROMFS.");
-    return UnimplementedFunction(ErrorModule::FS);
-}
-
 } // namespace FileSys
diff --git a/src/core/file_sys/archive_romfs.h b/src/core/file_sys/archive_romfs.h
index d4b1eb7f2..5cb75e04d 100644
--- a/src/core/file_sys/archive_romfs.h
+++ b/src/core/file_sys/archive_romfs.h
@@ -8,7 +8,7 @@
 
 #include "common/common_types.h"
 
-#include "core/file_sys/archive_backend.h"
+#include "core/file_sys/ivfc_archive.h"
 #include "core/loader/loader.h"
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -17,82 +17,12 @@
 namespace FileSys {
 
 /// File system interface to the RomFS archive
-class Archive_RomFS final : public ArchiveBackend {
+class Archive_RomFS final : public IVFCArchive {
 public:
     Archive_RomFS(const Loader::AppLoader& app_loader);
 
     std::string GetName() const override { return "RomFS"; }
-
-    /**
-     * Open a file specified by its path, using the specified mode
-     * @param path Path relative to the archive
-     * @param mode Mode to open the file with
-     * @return Opened file, or nullptr
-     */
-    std::unique_ptr<FileBackend> OpenFile(const Path& path, const Mode mode) const override;
-
-    /**
-     * Delete a file specified by its path
-     * @param path Path relative to the archive
-     * @return Whether the file could be deleted
-     */
-    bool DeleteFile(const Path& path) const override;
-
-    /**
-     * Rename a File specified by its path
-     * @param src_path Source path relative to the archive
-     * @param dest_path Destination path relative to the archive
-     * @return Whether rename succeeded
-     */
-    bool RenameFile(const Path& src_path, const Path& dest_path) const override;
-
-    /**
-     * Delete a directory specified by its path
-     * @param path Path relative to the archive
-     * @return Whether the directory could be deleted
-     */
-    bool DeleteDirectory(const Path& path) const override;
-
-    /**
-     * Create a file specified by its path
-     * @param path Path relative to the Archive
-     * @param size The size of the new file, filled with zeroes
-     * @return File creation result code
-     */
-    ResultCode CreateFile(const Path& path, u32 size) const override;
-
-    /**
-     * Create a directory specified by its path
-     * @param path Path relative to the archive
-     * @return Whether the directory could be created
-     */
-    bool CreateDirectory(const Path& path) const override;
-
-    /**
-     * Rename a Directory specified by its path
-     * @param src_path Source path relative to the archive
-     * @param dest_path Destination path relative to the archive
-     * @return Whether rename succeeded
-     */
-    bool RenameDirectory(const Path& src_path, const Path& dest_path) const override;
-
-    /**
-     * Open a directory specified by its path
-     * @param path Path relative to the archive
-     * @return Opened directory, or nullptr
-     */
-    std::unique_ptr<DirectoryBackend> OpenDirectory(const Path& path) const override;
-
-    ResultCode Open(const Path& path) override {
-        return RESULT_SUCCESS;
-    }
-
-    ResultCode Format(const Path& path) const override;
-
-private:
-    friend class File_RomFS;
-
-    std::vector<u8> raw_data;
+    ResultCode Open(const Path& path) override { return RESULT_SUCCESS; }
 };
 
 } // namespace FileSys
diff --git a/src/core/file_sys/archive_savedatacheck.cpp b/src/core/file_sys/archive_savedatacheck.cpp
new file mode 100644
index 000000000..233158a0c
--- /dev/null
+++ b/src/core/file_sys/archive_savedatacheck.cpp
@@ -0,0 +1,41 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/file_util.h"
+
+#include "core/file_sys/archive_savedatacheck.h"
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// FileSys namespace
+
+namespace FileSys {
+
+Archive_SaveDataCheck::Archive_SaveDataCheck(const std::string& mount_loc) : mount_point(mount_loc) {
+}
+
+ResultCode Archive_SaveDataCheck::Open(const Path& path) {
+    // TODO(Subv): We should not be overwriting raw_data everytime this function is called,
+    // but until we use factory classes to create the archives at runtime instead of creating them beforehand
+    // and allow multiple archives of the same type to be open at the same time without clobbering each other,
+    // we won't be able to maintain the state of each archive, hence we overwrite it every time it's needed.
+    // There are a number of problems with this, for example opening a file in this archive, then opening
+    // this archive again with a different path, will corrupt the previously open file.
+    auto vec = path.AsBinary();
+    const u32* data = reinterpret_cast<u32*>(vec.data());
+    std::string file_path = Common::StringFromFormat("%s%08x%08x.bin", mount_point.c_str(), data[1], data[0]);
+    FileUtil::IOFile file(file_path, "rb");
+
+    std::fill(raw_data.begin(), raw_data.end(), 0);
+
+    if (!file.IsOpen()) {
+        return ResultCode(-1); // TODO(Subv): Find the right error code
+    }
+    auto size = file.GetSize();
+    raw_data.resize(size);
+    file.ReadBytes(raw_data.data(), size);
+    file.Close();
+    return RESULT_SUCCESS;
+}
+
+} // namespace FileSys
diff --git a/src/core/file_sys/archive_savedatacheck.h b/src/core/file_sys/archive_savedatacheck.h
new file mode 100644
index 000000000..f6e73e803
--- /dev/null
+++ b/src/core/file_sys/archive_savedatacheck.h
@@ -0,0 +1,31 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vector>
+
+#include "common/common_types.h"
+
+#include "core/file_sys/ivfc_archive.h"
+#include "core/loader/loader.h"
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// FileSys namespace
+
+namespace FileSys {
+
+/// File system interface to the SaveDataCheck archive
+class Archive_SaveDataCheck final : public IVFCArchive {
+public:
+    Archive_SaveDataCheck(const std::string& mount_point);
+
+    std::string GetName() const override { return "SaveDataCheck"; }
+    ResultCode Open(const Path& path) override;
+
+private:
+    std::string mount_point;
+};
+
+} // namespace FileSys
diff --git a/src/core/file_sys/directory_romfs.cpp b/src/core/file_sys/directory_romfs.cpp
deleted file mode 100644
index e130aca17..000000000
--- a/src/core/file_sys/directory_romfs.cpp
+++ /dev/null
@@ -1,32 +0,0 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/common_types.h"
-
-#include "core/file_sys/directory_romfs.h"
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-// FileSys namespace
-
-namespace FileSys {
-
-Directory_RomFS::Directory_RomFS() {
-}
-
-Directory_RomFS::~Directory_RomFS() {
-}
-
-bool Directory_RomFS::Open() {
-    return false;
-}
-
-u32 Directory_RomFS::Read(const u32 count, Entry* entries) {
-    return 0;
-}
-
-bool Directory_RomFS::Close() const {
-    return false;
-}
-
-} // namespace FileSys
diff --git a/src/core/file_sys/directory_romfs.h b/src/core/file_sys/directory_romfs.h
deleted file mode 100644
index 2297f1645..000000000
--- a/src/core/file_sys/directory_romfs.h
+++ /dev/null
@@ -1,43 +0,0 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include "common/common_types.h"
-
-#include "core/file_sys/directory_backend.h"
-#include "core/loader/loader.h"
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-// FileSys namespace
-
-namespace FileSys {
-
-class Directory_RomFS final : public DirectoryBackend {
-public:
-    Directory_RomFS();
-    ~Directory_RomFS() override;
-
-    /**
-    * Open the directory
-    * @return true if the directory opened correctly
-    */
-    bool Open() override;
-
-    /**
-     * List files contained in the directory
-     * @param count Number of entries to return at once in entries
-     * @param entries Buffer to read data into
-     * @return Number of entries listed
-     */
-    u32 Read(const u32 count, Entry* entries) override;
-
-    /**
-     * Close the directory
-     * @return true if the directory closed correctly
-     */
-    bool Close() const override;
-};
-
-} // namespace FileSys
diff --git a/src/core/file_sys/disk_archive.cpp b/src/core/file_sys/disk_archive.cpp
index 0197f727d..c6e033fcd 100644
--- a/src/core/file_sys/disk_archive.cpp
+++ b/src/core/file_sys/disk_archive.cpp
@@ -6,6 +6,7 @@
 
 #include "common/common_types.h"
 #include "common/file_util.h"
+#include "common/make_unique.h"
 
 #include "core/file_sys/disk_archive.h"
 #include "core/settings.h"
@@ -17,10 +18,10 @@ namespace FileSys {
 
 std::unique_ptr<FileBackend> DiskArchive::OpenFile(const Path& path, const Mode mode) const {
     LOG_DEBUG(Service_FS, "called path=%s mode=%01X", path.DebugStr().c_str(), mode.hex);
-    DiskFile* file = new DiskFile(this, path, mode);
+    auto file = Common::make_unique<DiskFile>(this, path, mode);
     if (!file->Open())
         return nullptr;
-    return std::unique_ptr<FileBackend>(file);
+    return std::move(file);
 }
 
 bool DiskArchive::DeleteFile(const Path& path) const {
@@ -66,10 +67,10 @@ bool DiskArchive::RenameDirectory(const Path& src_path, const Path& dest_path) c
 
 std::unique_ptr<DirectoryBackend> DiskArchive::OpenDirectory(const Path& path) const {
     LOG_DEBUG(Service_FS, "called path=%s", path.DebugStr().c_str());
-    DiskDirectory* directory = new DiskDirectory(this, path);
+    auto directory = Common::make_unique<DiskDirectory>(this, path);
     if (!directory->Open())
         return nullptr;
-    return std::unique_ptr<DirectoryBackend>(directory);
+    return std::move(directory);
 }
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -100,7 +101,7 @@ bool DiskFile::Open() {
     // Open the file in binary mode, to avoid problems with CR/LF on Windows systems
     mode_string += "b";
 
-    file = new FileUtil::IOFile(path, mode_string.c_str());
+    file = Common::make_unique<FileUtil::IOFile>(path, mode_string.c_str());
     return true;
 }
 
diff --git a/src/core/file_sys/disk_archive.h b/src/core/file_sys/disk_archive.h
index f18d96f5a..3472f6874 100644
--- a/src/core/file_sys/disk_archive.h
+++ b/src/core/file_sys/disk_archive.h
@@ -56,10 +56,6 @@ public:
     DiskFile();
     DiskFile(const DiskArchive* archive, const Path& path, const Mode mode);
 
-    ~DiskFile() override {
-        Close();
-    }
-
     bool Open() override;
     size_t Read(const u64 offset, const u32 length, u8* buffer) const override;
     size_t Write(const u64 offset, const u32 length, const u32 flush, const u8* buffer) const override;
@@ -75,7 +71,7 @@ protected:
     const DiskArchive* archive;
     std::string path;
     Mode mode;
-    FileUtil::IOFile* file;
+    std::unique_ptr<FileUtil::IOFile> file;
 };
 
 class DiskDirectory : public DirectoryBackend {
diff --git a/src/core/file_sys/file_romfs.cpp b/src/core/file_sys/file_romfs.cpp
deleted file mode 100644
index 7467d6d31..000000000
--- a/src/core/file_sys/file_romfs.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/common_types.h"
-
-#include "core/file_sys/file_romfs.h"
-#include "core/file_sys/archive_romfs.h"
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-// FileSys namespace
-
-namespace FileSys {
-
-bool File_RomFS::Open() {
-    return true;
-}
-
-size_t File_RomFS::Read(const u64 offset, const u32 length, u8* buffer) const {
-    LOG_TRACE(Service_FS, "called offset=%llu, length=%d", offset, length);
-    memcpy(buffer, &archive->raw_data[(u32)offset], length);
-    return length;
-}
-
-size_t File_RomFS::Write(const u64 offset, const u32 length, const u32 flush, const u8* buffer) const {
-    LOG_WARNING(Service_FS, "Attempted to write to ROMFS.");
-    return 0;
-}
-
-size_t File_RomFS::GetSize() const {
-    return sizeof(u8) * archive->raw_data.size();
-}
-
-bool File_RomFS::SetSize(const u64 size) const {
-    LOG_WARNING(Service_FS, "Attempted to set the size of ROMFS");
-    return false;
-}
-
-bool File_RomFS::Close() const {
-    return false;
-}
-
-} // namespace FileSys
diff --git a/src/core/file_sys/file_romfs.h b/src/core/file_sys/file_romfs.h
deleted file mode 100644
index 04d8a16a2..000000000
--- a/src/core/file_sys/file_romfs.h
+++ /dev/null
@@ -1,73 +0,0 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include "common/common_types.h"
-
-#include "core/file_sys/file_backend.h"
-#include "core/loader/loader.h"
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-// FileSys namespace
-
-namespace FileSys {
-
-class Archive_RomFS;
-
-class File_RomFS final : public FileBackend {
-public:
-    File_RomFS(const Archive_RomFS* archive) : archive(archive) {}
-
-    /**
-     * Open the file
-     * @return true if the file opened correctly
-     */
-    bool Open() override;
-
-    /**
-     * Read data from the file
-     * @param offset Offset in bytes to start reading data from
-     * @param length Length in bytes of data to read from file
-     * @param buffer Buffer to read data into
-     * @return Number of bytes read
-     */
-    size_t Read(const u64 offset, const u32 length, u8* buffer) const override;
-
-    /**
-     * Write data to the file
-     * @param offset Offset in bytes to start writing data to
-     * @param length Length in bytes of data to write to file
-     * @param flush The flush parameters (0 == do not flush)
-     * @param buffer Buffer to read data from
-     * @return Number of bytes written
-     */
-    size_t Write(const u64 offset, const u32 length, const u32 flush, const u8* buffer) const override;
-
-    /**
-     * Get the size of the file in bytes
-     * @return Size of the file in bytes
-     */
-    size_t GetSize() const override;
-
-    /**
-     * Set the size of the file in bytes
-     * @param size New size of the file
-     * @return true if successful
-     */
-    bool SetSize(const u64 size) const override;
-
-    /**
-     * Close the file
-     * @return true if the file closed correctly
-     */
-    bool Close() const override;
-
-    void Flush() const override { }
-
-private:
-    const Archive_RomFS* archive;
-};
-
-} // namespace FileSys
diff --git a/src/core/file_sys/ivfc_archive.cpp b/src/core/file_sys/ivfc_archive.cpp
new file mode 100644
index 000000000..68c3c8b81
--- /dev/null
+++ b/src/core/file_sys/ivfc_archive.cpp
@@ -0,0 +1,88 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <memory>
+
+#include "common/common_types.h"
+#include "common/file_util.h"
+#include "common/make_unique.h"
+
+#include "core/file_sys/ivfc_archive.h"
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// FileSys namespace
+
+namespace FileSys {
+
+IVFCArchive::IVFCArchive() {
+}
+
+std::unique_ptr<FileBackend> IVFCArchive::OpenFile(const Path& path, const Mode mode) const {
+    return Common::make_unique<IVFCFile>(this);
+}
+
+bool IVFCArchive::DeleteFile(const Path& path) const {
+    LOG_CRITICAL(Service_FS, "Attempted to delete a file from an IVFC archive (%s).", GetName().c_str());
+    return false;
+}
+
+bool IVFCArchive::RenameFile(const Path& src_path, const Path& dest_path) const {
+    LOG_CRITICAL(Service_FS, "Attempted to rename a file within an IVFC archive (%s).", GetName().c_str());
+    return false;
+}
+
+bool IVFCArchive::DeleteDirectory(const Path& path) const {
+    LOG_CRITICAL(Service_FS, "Attempted to delete a directory from an IVFC archive (%s).", GetName().c_str());
+    return false;
+}
+
+ResultCode IVFCArchive::CreateFile(const Path& path, u32 size) const {
+    LOG_CRITICAL(Service_FS, "Attempted to create a file in an IVFC archive (%s).", GetName().c_str());
+    // TODO: Verify error code
+    return ResultCode(ErrorDescription::NotAuthorized, ErrorModule::FS, ErrorSummary::NotSupported, ErrorLevel::Permanent);
+}
+
+bool IVFCArchive::CreateDirectory(const Path& path) const {
+    LOG_CRITICAL(Service_FS, "Attempted to create a directory in an IVFC archive (%s).", GetName().c_str());
+    return false;
+}
+
+bool IVFCArchive::RenameDirectory(const Path& src_path, const Path& dest_path) const {
+    LOG_CRITICAL(Service_FS, "Attempted to rename a file within an IVFC archive (%s).", GetName().c_str());
+    return false;
+}
+
+std::unique_ptr<DirectoryBackend> IVFCArchive::OpenDirectory(const Path& path) const {
+    return Common::make_unique<IVFCDirectory>();
+}
+
+ResultCode IVFCArchive::Format(const Path& path) const {
+    LOG_CRITICAL(Service_FS, "Attempted to format an IVFC archive (%s).", GetName().c_str());
+    // TODO: Verify error code
+    return ResultCode(ErrorDescription::NotAuthorized, ErrorModule::FS, ErrorSummary::NotSupported, ErrorLevel::Permanent);
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+size_t IVFCFile::Read(const u64 offset, const u32 length, u8* buffer) const {
+    LOG_TRACE(Service_FS, "called offset=%llu, length=%d", offset, length);
+    memcpy(buffer, &archive->raw_data[(u32)offset], length);
+    return length;
+}
+
+size_t IVFCFile::Write(const u64 offset, const u32 length, const u32 flush, const u8* buffer) const {
+    LOG_CRITICAL(Service_FS, "Attempted to write to IVFC file in archive %s.", archive->GetName().c_str());
+    return 0;
+}
+
+size_t IVFCFile::GetSize() const {
+    return sizeof(u8) * archive->raw_data.size();
+}
+
+bool IVFCFile::SetSize(const u64 size) const {
+    LOG_CRITICAL(Service_FS, "Attempted to set the size of an IVFC file in archive %s", archive->GetName().c_str());
+    return false;
+}
+
+} // namespace FileSys
diff --git a/src/core/file_sys/ivfc_archive.h b/src/core/file_sys/ivfc_archive.h
new file mode 100644
index 000000000..6f4cc86df
--- /dev/null
+++ b/src/core/file_sys/ivfc_archive.h
@@ -0,0 +1,66 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vector>
+
+#include "common/common_types.h"
+
+#include "core/file_sys/archive_backend.h"
+#include "core/loader/loader.h"
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// FileSys namespace
+
+namespace FileSys {
+
+/**
+ * Helper which implements an interface to deal with IVFC images used in some archives
+ * This should be subclassed by concrete archive types, which will provide the
+ * input data (load the raw IVFC archive) and override any required methods
+ */
+class IVFCArchive : public ArchiveBackend {
+public:
+    IVFCArchive();
+
+    std::unique_ptr<FileBackend> OpenFile(const Path& path, const Mode mode) const override;
+    bool DeleteFile(const Path& path) const override;
+    bool RenameFile(const Path& src_path, const Path& dest_path) const override;
+    bool DeleteDirectory(const Path& path) const override;
+    ResultCode CreateFile(const Path& path, u32 size) const override;
+    bool CreateDirectory(const Path& path) const override;
+    bool RenameDirectory(const Path& src_path, const Path& dest_path) const override;
+    std::unique_ptr<DirectoryBackend> OpenDirectory(const Path& path) const override;
+    ResultCode Format(const Path& path) const override;
+
+protected:
+    friend class IVFCFile;
+    std::vector<u8> raw_data;
+};
+
+class IVFCFile : public FileBackend {
+public:
+    IVFCFile(const IVFCArchive* archive) : archive(archive) {}
+
+    bool Open() override { return true; }
+    size_t Read(const u64 offset, const u32 length, u8* buffer) const override;
+    size_t Write(const u64 offset, const u32 length, const u32 flush, const u8* buffer) const override;
+    size_t GetSize() const override;
+    bool SetSize(const u64 size) const override;
+    bool Close() const override { return false; }
+    void Flush() const override { }
+
+private:
+    const IVFCArchive* archive;
+};
+
+class IVFCDirectory : public DirectoryBackend {
+public:
+    bool Open() override { return false; }
+    u32 Read(const u32 count, Entry* entries) override { return 0; }
+    bool Close() const override { return false; }
+};
+
+} // namespace FileSys
diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp
index 38705e3cd..736bbc36a 100644
--- a/src/core/hle/kernel/address_arbiter.cpp
+++ b/src/core/hle/kernel/address_arbiter.cpp
@@ -51,6 +51,17 @@ ResultCode ArbitrateAddress(Handle handle, ArbitrationType type, u32 address, s3
             HLE::Reschedule(__func__);
         }
         break;
+    
+    case ArbitrationType::DecrementAndWaitIfLessThan:
+    {
+        s32 memory_value = Memory::Read32(address) - 1;
+        Memory::Write32(address, memory_value);
+        if (memory_value <= value) {
+            Kernel::WaitCurrentThread(WAITTYPE_ARB, handle, address);
+            HLE::Reschedule(__func__);
+        }
+        break;
+    }
 
     default:
         LOG_ERROR(Kernel, "unknown type=%d", type);
diff --git a/src/core/hle/kernel/mutex.cpp b/src/core/hle/kernel/mutex.cpp
index 558068c79..3dfeffc9b 100644
--- a/src/core/hle/kernel/mutex.cpp
+++ b/src/core/hle/kernel/mutex.cpp
@@ -168,9 +168,9 @@ Handle CreateMutex(bool initial_locked, const std::string& name) {
 ResultVal<bool> Mutex::WaitSynchronization() {
     bool wait = locked;
     if (locked) {
+        waiting_threads.push_back(GetCurrentThreadHandle());
         Kernel::WaitCurrentThread(WAITTYPE_MUTEX, GetHandle());
-    }
-    else {
+    } else {
         // Lock the mutex when the first thread accesses it
         locked = true;
         MutexAcquireLock(this);
diff --git a/src/core/hle/service/dsp_dsp.cpp b/src/core/hle/service/dsp_dsp.cpp
index 2cf4d118f..d4affdfbf 100644
--- a/src/core/hle/service/dsp_dsp.cpp
+++ b/src/core/hle/service/dsp_dsp.cpp
@@ -12,9 +12,23 @@
 
 namespace DSP_DSP {
 
-static u32 read_pipe_count;
-static Handle semaphore_event;
-static Handle interrupt_event;
+static u32 read_pipe_count    = 0;
+static Handle semaphore_event = 0;
+static Handle interrupt_event = 0;
+
+void SignalInterrupt() {
+    // TODO(bunnei): This is just a stub, it does not do anything other than signal to the emulated
+    // application that a DSP interrupt occurred, without specifying which one. Since we do not
+    // emulate the DSP yet (and how it works is largely unknown), this is a work around to get games
+    // that check the DSP interrupt signal event to run. We should figure out the different types of
+    // DSP interrupts, and trigger them at the appropriate times.
+
+    if (interrupt_event == 0) {
+        LOG_WARNING(Service_DSP, "cannot signal interrupt until DSP event has been created!");
+        return;
+    }
+    Kernel::SignalEvent(interrupt_event);
+}
 
 /**
  * DSP_DSP::ConvertProcessAddressFromDspDram service function
@@ -102,7 +116,7 @@ void RegisterInterruptEvents(Service::Interface* self) {
 void WriteReg0x10(Service::Interface* self) {
     u32* cmd_buff = Kernel::GetCommandBuffer();
 
-    Kernel::SignalEvent(interrupt_event);
+    SignalInterrupt();
 
     cmd_buff[1] = 0; // No error
 
diff --git a/src/core/hle/service/dsp_dsp.h b/src/core/hle/service/dsp_dsp.h
index 0b8b64600..fa13bfb7c 100644
--- a/src/core/hle/service/dsp_dsp.h
+++ b/src/core/hle/service/dsp_dsp.h
@@ -20,4 +20,7 @@ public:
     }
 };
 
+/// Signals that a DSP interrupt has occurred to userland code
+void SignalInterrupt();
+
 } // namespace
diff --git a/src/core/hle/service/fs/archive.cpp b/src/core/hle/service/fs/archive.cpp
index f19ca3a9f..f761c6ab9 100644
--- a/src/core/hle/service/fs/archive.cpp
+++ b/src/core/hle/service/fs/archive.cpp
@@ -10,9 +10,11 @@
 #include "common/make_unique.h"
 #include "common/math_util.h"
 
-#include "core/file_sys/archive_savedata.h"
-#include "core/file_sys/archive_extsavedata.h"
 #include "core/file_sys/archive_backend.h"
+#include "core/file_sys/archive_extsavedata.h"
+#include "core/file_sys/archive_romfs.h"
+#include "core/file_sys/archive_savedata.h"
+#include "core/file_sys/archive_savedatacheck.h"
 #include "core/file_sys/archive_sdmc.h"
 #include "core/file_sys/directory_backend.h"
 #include "core/hle/service/fs/archive.h"
@@ -50,6 +52,9 @@ enum class FileCommand : u32 {
     SetAttributes   = 0x08070040,
     Close           = 0x08080000,
     Flush           = 0x08090000,
+    SetPriority     = 0x080A0040,
+    GetPriority     = 0x080B0000,
+    OpenLinkFile    = 0x080C0000,
 };
 
 // Command to access directory
@@ -63,7 +68,7 @@ enum class DirectoryCommand : u32 {
 class Archive {
 public:
     Archive(std::unique_ptr<FileSys::ArchiveBackend>&& backend, ArchiveIdCode id_code)
-            : backend(std::move(backend)), id_code(id_code) {
+            : id_code(id_code), backend(std::move(backend)) {
     }
 
     std::string GetName() const { return "Archive: " + backend->GetName(); }
@@ -75,12 +80,13 @@ public:
 class File : public Kernel::Session {
 public:
     File(std::unique_ptr<FileSys::FileBackend>&& backend, const FileSys::Path& path)
-            : backend(std::move(backend)), path(path) {
+            : path(path), backend(std::move(backend)), priority(0) {
     }
 
     std::string GetName() const override { return "Path: " + path.DebugStr(); }
 
     FileSys::Path path; ///< Path of the file
+    u32 priority; ///< Priority of the file. TODO(Subv): Find out what this means
     std::unique_ptr<FileSys::FileBackend> backend; ///< File backend interface
 
     ResultVal<bool> SyncRequest() override {
@@ -145,6 +151,27 @@ public:
             break;
         }
 
+        case FileCommand::OpenLinkFile:
+        {
+            LOG_WARNING(Service_FS, "(STUBBED) File command OpenLinkFile %s", GetName().c_str());
+            cmd_buff[3] = GetHandle();
+            break;
+        }
+
+        case FileCommand::SetPriority:
+        {
+            priority = cmd_buff[1];
+            LOG_TRACE(Service_FS, "SetPriority %u", priority);
+            break;
+        }
+
+        case FileCommand::GetPriority:
+        {
+            cmd_buff[2] = priority;
+            LOG_TRACE(Service_FS, "GetPriority");
+            break;
+        }
+
         // Unknown command...
         default:
             LOG_ERROR(Service_FS, "Unknown command=0x%08X!", cmd);
@@ -160,7 +187,7 @@ public:
 class Directory : public Kernel::Session {
 public:
     Directory(std::unique_ptr<FileSys::DirectoryBackend>&& backend, const FileSys::Path& path)
-            : backend(std::move(backend)), path(path) {
+            : path(path), backend(std::move(backend)) {
     }
 
     std::string GetName() const override { return "Directory: " + path.DebugStr(); }
@@ -327,7 +354,7 @@ ResultCode DeleteDirectoryFromArchive(ArchiveHandle archive_handle, const FileSy
                       ErrorSummary::Canceled, ErrorLevel::Status);
 }
 
-ResultCode CreateFileInArchive(Handle archive_handle, const FileSys::Path& path, u32 file_size) {
+ResultCode CreateFileInArchive(ArchiveHandle archive_handle, const FileSys::Path& path, u32 file_size) {
     Archive* archive = GetArchive(archive_handle);
     if (archive == nullptr)
         return InvalidHandle(ErrorModule::FS);
@@ -435,6 +462,11 @@ void ArchiveInit() {
     else
         LOG_ERROR(Service_FS, "Can't instantiate SharedExtSaveData archive with path %s", 
                   sharedextsavedata_directory.c_str());
+
+    // Create the SaveDataCheck archive, basically a small variation of the RomFS archive
+    std::string savedatacheck_directory = FileUtil::GetUserPath(D_SAVEDATACHECK_IDX);
+    auto savedatacheck_archive = Common::make_unique<FileSys::Archive_SaveDataCheck>(savedatacheck_directory);
+    CreateArchive(std::move(savedatacheck_archive), ArchiveIdCode::SaveDataCheck);
 }
 
 /// Shutdown archives
diff --git a/src/core/hle/service/fs/archive.h b/src/core/hle/service/fs/archive.h
index c23b8cc46..9e9efa019 100644
--- a/src/core/hle/service/fs/archive.h
+++ b/src/core/hle/service/fs/archive.h
@@ -22,6 +22,7 @@ enum class ArchiveIdCode : u32 {
     SystemSaveData      = 0x00000008,
     SDMC                = 0x00000009,
     SDMCWriteOnly       = 0x0000000A,
+    SaveDataCheck       = 0x2345678A,
 };
 
 typedef u64 ArchiveHandle;
@@ -90,7 +91,7 @@ ResultCode DeleteDirectoryFromArchive(ArchiveHandle archive_handle, const FileSy
  * @param file_size The size of the new file, filled with zeroes
  * @return File creation result code
  */
-ResultCode CreateFileInArchive(Handle archive_handle, const FileSys::Path& path, u32 file_size);
+ResultCode CreateFileInArchive(ArchiveHandle archive_handle, const FileSys::Path& path, u32 file_size);
 
 /**
  * Create a Directory from an Archive
diff --git a/src/core/hle/service/service.cpp b/src/core/hle/service/service.cpp
index 0f3cc2aa8..c5233e687 100644
--- a/src/core/hle/service/service.cpp
+++ b/src/core/hle/service/service.cpp
@@ -37,6 +37,7 @@
 #include "core/hle/service/soc_u.h"
 #include "core/hle/service/srv.h"
 #include "core/hle/service/ssl_c.h"
+#include "core/hle/service/y2r_u.h"
 
 namespace Service {
 
@@ -122,6 +123,7 @@ void Init() {
     g_manager->AddService(new PTM_U::Interface);
     g_manager->AddService(new SOC_U::Interface);
     g_manager->AddService(new SSL_C::Interface);
+    g_manager->AddService(new Y2R_U::Interface);
 
     LOG_DEBUG(Service, "initialized OK");
 }
diff --git a/src/core/hle/service/soc_u.cpp b/src/core/hle/service/soc_u.cpp
index 9fbf18b26..f502c6afe 100644
--- a/src/core/hle/service/soc_u.cpp
+++ b/src/core/hle/service/soc_u.cpp
@@ -308,11 +308,11 @@ static void Socket(Service::Interface* self) {
 
     u32 socket_handle = static_cast<u32>(::socket(domain, type, protocol));
 
-    if (socket_handle != SOCKET_ERROR_VALUE)
+    if ((s32)socket_handle != SOCKET_ERROR_VALUE)
         open_sockets[socket_handle] = { socket_handle, true };
 
     int result = 0;
-    if (socket_handle == SOCKET_ERROR_VALUE)
+    if ((s32)socket_handle == SOCKET_ERROR_VALUE)
         result = TranslateError(GET_ERRNO);
 
     cmd_buffer[1] = result;
@@ -404,7 +404,7 @@ static void Fcntl(Service::Interface* self) {
         }
 #endif
     } else {
-        LOG_ERROR(Service_SOC, "Unsupported command (%d) in fcntl call");
+        LOG_ERROR(Service_SOC, "Unsupported command (%d) in fcntl call", ctr_cmd);
         result = TranslateError(EINVAL); // TODO: Find the correct error
         posix_ret = -1;
         return;
@@ -436,11 +436,11 @@ static void Accept(Service::Interface* self) {
     socklen_t addr_len = sizeof(addr);
     u32 ret = static_cast<u32>(::accept(socket_handle, &addr, &addr_len));
     
-    if (ret != SOCKET_ERROR_VALUE)
+    if ((s32)ret != SOCKET_ERROR_VALUE)
         open_sockets[ret] = { ret, true };
 
     int result = 0;
-    if (ret == SOCKET_ERROR_VALUE) {
+    if ((s32)ret == SOCKET_ERROR_VALUE) {
         result = TranslateError(GET_ERRNO);
     } else {
         CTRSockAddr ctr_addr = CTRSockAddr::FromPlatform(addr);
diff --git a/src/core/hle/service/y2r_u.cpp b/src/core/hle/service/y2r_u.cpp
new file mode 100644
index 000000000..f9e3619dd
--- /dev/null
+++ b/src/core/hle/service/y2r_u.cpp
@@ -0,0 +1,45 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/log.h"
+#include "core/hle/hle.h"
+#include "core/hle/kernel/event.h"
+#include "core/hle/service/y2r_u.h"
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// Namespace Y2R_U
+
+namespace Y2R_U {
+
+const Interface::FunctionInfo FunctionTable[] = {
+    {0x00010040, nullptr,                 "SetInputFormat"},
+    {0x00030040, nullptr,                 "SetOutputFormat"},
+    {0x00050040, nullptr,                 "SetRotation"},
+    {0x00070040, nullptr,                 "SetBlockAlignment"},
+    {0x000D0040, nullptr,                 "SetTransferEndInterrupt"},
+    {0x000F0000, nullptr,                 "GetTransferEndEvent"},
+    {0x00100102, nullptr,                 "SetSendingY"},
+    {0x00110102, nullptr,                 "SetSendingU"},
+    {0x00120102, nullptr,                 "SetSendingV"},
+    {0x00180102, nullptr,                 "SetReceiving"},
+    {0x001A0040, nullptr,                 "SetInputLineWidth"},
+    {0x001C0040, nullptr,                 "SetInputLines"},
+    {0x00200040, nullptr,                 "SetStandardCoefficient"},
+    {0x00220040, nullptr,                 "SetAlpha"},
+    {0x00260000, nullptr,                 "StartConversion"},
+    {0x00270000, nullptr,                 "StopConversion"},
+    {0x00280000, nullptr,                 "IsBusyConversion"},
+    {0x002A0000, nullptr,                 "PingProcess"},
+    {0x002B0000, nullptr,                 "DriverInitialize"},
+    {0x002C0000, nullptr,                 "DriverFinalize"}
+};
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// Interface class
+
+Interface::Interface() {
+    Register(FunctionTable, ARRAY_SIZE(FunctionTable));
+}
+    
+} // namespace
diff --git a/src/core/hle/service/y2r_u.h b/src/core/hle/service/y2r_u.h
new file mode 100644
index 000000000..171aecfd1
--- /dev/null
+++ b/src/core/hle/service/y2r_u.h
@@ -0,0 +1,23 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "core/hle/service/service.h"
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// Namespace Y2R_U
+
+namespace Y2R_U {
+
+class Interface : public Service::Interface {
+public:
+    Interface();
+
+    std::string GetPortName() const override {
+        return "y2r:u";
+    }
+};
+
+} // namespace
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp
index dd619cb16..e346e0ad6 100644
--- a/src/core/hw/gpu.cpp
+++ b/src/core/hw/gpu.cpp
@@ -10,6 +10,7 @@
 
 #include "core/hle/hle.h"
 #include "core/hle/service/gsp_gpu.h"
+#include "core/hle/service/dsp_dsp.h"
 
 #include "core/hw/gpu.h"
 
@@ -94,11 +95,15 @@ inline void Write(u32 addr, const T data) {
                         int r, g, b, a;
                     } source_color = { 0, 0, 0, 0 };
 
+                    // Cheap emulation of horizontal scaling: Just skip each second pixel of the
+                    // input framebuffer. We keep track of this in the pixel_skip variable.
+                    unsigned pixel_skip = (config.scale_horizontally != 0) ? 2 : 1;
+
                     switch (config.input_format) {
                     case Regs::PixelFormat::RGBA8:
                     {
                         // TODO: Most likely got the component order messed up.
-                        u8* srcptr = source_pointer + x * 4 + y * config.input_width * 4;
+                        u8* srcptr = source_pointer + x * 4 * pixel_skip + y * config.input_width * 4 * pixel_skip;
                         source_color.r = srcptr[0]; // blue
                         source_color.g = srcptr[1]; // green
                         source_color.b = srcptr[2]; // red
@@ -210,13 +215,18 @@ void Update() {
             //  - If frameskip == 0 (disabled), always swap buffers
             //  - If frameskip == 1, swap buffers every other frame (starting from the first frame)
             //  - If frameskip > 1, swap buffers every frameskip^n frames (starting from the second frame)
-
             if ((((Settings::values.frame_skip != 1) ^ last_skip_frame) && last_skip_frame != g_skip_frame) || 
                    Settings::values.frame_skip == 0) {
                 VideoCore::g_renderer->SwapBuffers();
             }
 
+            // Signal to GSP that GPU interrupt has occurred
             GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PDC1);
+
+            // TODO(bunnei): Fake a DSP interrupt on each frame. This does not belong here, but
+            // until we can emulate DSP interrupts, this is probably the only reasonable place to do
+            // this. Certain games expect this to be periodically signaled.
+            DSP_DSP::SignalInterrupt();
         }
     }
 }
diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h
index 292f496c1..7de055232 100644
--- a/src/core/hw/gpu.h
+++ b/src/core/hw/gpu.h
@@ -157,6 +157,9 @@ struct Regs {
             BitField< 8, 3, PixelFormat> input_format;
             BitField<12, 3, PixelFormat> output_format;
             BitField<16, 1, u32> output_tiled;     // stores output in a tiled format
+
+            // TODO: Not really sure if this actually scales, or even resizes at all.
+            BitField<24, 1, u32> scale_horizontally;
         };
 
         INSERT_PADDING_WORDS(0x1);
diff --git a/src/core/loader/elf.cpp b/src/core/loader/elf.cpp
index 354335014..3ca60c072 100644
--- a/src/core/loader/elf.cpp
+++ b/src/core/loader/elf.cpp
@@ -222,7 +222,7 @@ public:
     int GetSectionSize(SectionID section) const { return sections[section].sh_size; }
     SectionID GetSectionByName(const char *name, int firstSection = 0) const; //-1 for not found
 
-    bool DidRelocate() {
+    bool DidRelocate() const {
         return relocate;
     }
 };
diff --git a/src/core/loader/loader.cpp b/src/core/loader/loader.cpp
index 87580cb2a..45cf425df 100644
--- a/src/core/loader/loader.cpp
+++ b/src/core/loader/loader.cpp
@@ -45,6 +45,8 @@ FileType IdentifyFile(const std::string &filename) {
         return FileType::CCI;
     } else if (extension == ".bin") {
         return FileType::BIN;
+    } else if (extension == ".3ds") {
+        return FileType::CCI;
     } else if (extension == ".3dsx") {
         return FileType::THREEDSX;
     }
diff --git a/src/video_core/color.h b/src/video_core/color.h
new file mode 100644
index 000000000..e86ac1265
--- /dev/null
+++ b/src/video_core/color.h
@@ -0,0 +1,32 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+
+namespace Color {
+
+/// Convert a 1-bit color component to 8 bit
+static inline u8 Convert1To8(u8 value) {
+    return value * 255;
+}
+
+/// Convert a 4-bit color component to 8 bit
+static inline u8 Convert4To8(u8 value) {
+    return (value << 4) | value;
+}
+
+/// Convert a 5-bit color component to 8 bit
+static inline u8 Convert5To8(u8 value) {
+    return (value << 3) | (value >> 2);
+}
+
+/// Convert a 6-bit color component to 8 bit
+static inline u8 Convert6To8(u8 value) {
+    return (value << 2) | (value >> 4);
+}
+
+
+} // namespace
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index 9602779f4..0d9f4ba66 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -112,6 +112,11 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
                 // Initialize data for the current vertex
                 VertexShader::InputVertex input;
 
+                // Load a debugging token to check whether this gets loaded by the running
+                // application or not.
+                static const float24 debug_token = float24::FromRawFloat24(0x00abcdef);
+                input.attr[0].w = debug_token;
+
                 for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) {
                     for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
                         const u8* srcdata = Memory::GetPointer(PAddrToVAddr(vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i]));
@@ -136,6 +141,16 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
                     }
                 }
 
+                // HACK: Some games do not initialize the vertex position's w component. This leads
+                //       to critical issues since it messes up perspective division. As a
+                //       workaround, we force the fourth component to 1.0 if we find this to be the
+                //       case.
+                //       To do this, we additionally have to assume that the first input attribute
+                //       is the vertex position, since there's no information about this other than
+                //       the empiric observation that this is usually the case.
+                if (input.attr[0].w == debug_token)
+                    input.attr[0].w = float24::FromFloat32(1.0);
+
                 if (g_debug_context)
                     g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, (void*)&input);
 
@@ -173,6 +188,19 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
 
             break;
 
+        case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[0], 0x2b1):
+        case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[1], 0x2b2):
+        case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[2], 0x2b3):
+        case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[3], 0x2b4):
+        {
+            int index = (id - PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[0], 0x2b1));
+            auto values = registers.vs_int_uniforms[index];
+            VertexShader::GetIntUniform(index) = Math::Vec4<u8>(values.x, values.y, values.z, values.w);
+            LOG_TRACE(HW_GPU, "Set integer uniform %d to %02x %02x %02x %02x",
+                      index, values.x.Value(), values.y.Value(), values.z.Value(), values.w.Value());
+            break;
+        }
+
         case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[0], 0x2c1):
         case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[1], 0x2c2):
         case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[2], 0x2c3):
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp
index 5921185a6..a494465b9 100644
--- a/src/video_core/debug_utils/debug_utils.cpp
+++ b/src/video_core/debug_utils/debug_utils.cpp
@@ -19,6 +19,7 @@
 #include "common/log.h"
 #include "common/file_util.h"
 
+#include "video_core/color.h"
 #include "video_core/math.h"
 #include "video_core/pica.h"
 
@@ -359,29 +360,26 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
         u8 g = ((source_ptr) >> 6) & 0x1F;
         u8 b = (source_ptr >> 1) & 0x1F;
         u8 a = source_ptr & 1;
-        return Math::MakeVec<u8>((r << 3) | (r >> 2), (g << 3) | (g >> 2), (b << 3) | (b >> 2), disable_alpha ? 255 : (a * 255));
+        return Math::MakeVec<u8>(Color::Convert5To8(r), Color::Convert5To8(g),
+                                 Color::Convert5To8(b), disable_alpha ? 255 : Color::Convert1To8(a));
     }
 
     case Regs::TextureFormat::RGB565:
     {
         const u16 source_ptr = *(const u16*)(source + offset * 2);
-        u8 r = (source_ptr >> 11) & 0x1F;
-        u8 g = ((source_ptr) >> 5) & 0x3F;
-        u8 b = (source_ptr) & 0x1F;
-        return Math::MakeVec<u8>((r << 3) | (r >> 2), (g << 2) | (g >> 4), (b << 3) | (b >> 2), 255);
+        u8 r = Color::Convert5To8((source_ptr >> 11) & 0x1F);
+        u8 g = Color::Convert6To8(((source_ptr) >> 5) & 0x3F);
+        u8 b = Color::Convert5To8((source_ptr) & 0x1F);
+        return Math::MakeVec<u8>(r, g, b, 255);
     }
 
     case Regs::TextureFormat::RGBA4:
     {
         const u8* source_ptr = source + offset * 2;
-        u8 r = source_ptr[1] >> 4;
-        u8 g = source_ptr[1] & 0xFF;
-        u8 b = source_ptr[0] >> 4;
-        u8 a = source_ptr[0] & 0xFF;
-        r = (r << 4) | r;
-        g = (g << 4) | g;
-        b = (b << 4) | b;
-        a = (a << 4) | a;
+        u8 r = Color::Convert4To8(source_ptr[1] >> 4);
+        u8 g = Color::Convert4To8(source_ptr[1] & 0xF);
+        u8 b = Color::Convert4To8(source_ptr[0] >> 4);
+        u8 a = Color::Convert4To8(source_ptr[0] & 0xF);
         return { r, g, b, disable_alpha ? (u8)255 : a };
     }
 
@@ -389,13 +387,11 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
     {
         const u8* source_ptr = source + offset * 2;
 
-        // TODO: component order not verified
-
         if (disable_alpha) {
             // Show intensity as red, alpha as green
-            return { source_ptr[0], source_ptr[1], 0, 255 };
+            return { source_ptr[1], source_ptr[0], 0, 255 };
         } else {
-            return { source_ptr[0], source_ptr[0], source_ptr[0], source_ptr[1]};
+            return { source_ptr[1], source_ptr[1], source_ptr[1], source_ptr[0]};
         }
     }
 
@@ -418,14 +414,10 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
 
     case Regs::TextureFormat::IA4:
     {
-        const u8* source_ptr = source + offset / 2;
-
-        // TODO: component order not verified
+        const u8* source_ptr = source + offset;
 
-        u8 i = (*source_ptr) & 0xF;
-        u8 a = ((*source_ptr) & 0xF0) >> 4;
-        a |= a << 4;
-        i |= i << 4;
+        u8 i = Color::Convert4To8(((*source_ptr) & 0xF0) >> 4);
+        u8 a = Color::Convert4To8((*source_ptr) & 0xF);
 
         if (disable_alpha) {
             // Show intensity as red, alpha as green
@@ -439,15 +431,13 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
     {
         const u8* source_ptr = source + offset / 2;
 
-        // TODO: component order not verified
-
         u8 a = (coarse_x % 2) ? ((*source_ptr)&0xF) : (((*source_ptr) & 0xF0) >> 4);
-        a |= a << 4;
+        a = Color::Convert4To8(a);
 
         if (disable_alpha) {
-            return { *source_ptr, *source_ptr, *source_ptr, 255 };
+            return { a, a, a, 255 };
         } else {
-            return { 0, 0, 0, *source_ptr };
+            return { 0, 0, 0, a };
         }
     }
 
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index 38bac748c..f5771ed84 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -50,7 +50,19 @@ struct Regs {
 
     u32 trigger_irq;
 
-    INSERT_PADDING_WORDS(0x30);
+    INSERT_PADDING_WORDS(0x2f);
+
+    enum class CullMode : u32 {
+        // Select which polygons are considered to be "frontfacing".
+        KeepAll              = 0,
+        KeepClockWise        = 1,
+        KeepCounterClockWise = 2,
+        // TODO: What does the third value imply?
+    };
+
+    union {
+        BitField<0, 2, CullMode> cull_mode;
+    };
 
     BitField<0, 24, u32> viewport_size_x;
 
@@ -289,7 +301,7 @@ struct Regs {
     TevStageConfig tev_stage4;
     INSERT_PADDING_WORDS(0x3);
     TevStageConfig tev_stage5;
-    INSERT_PADDING_WORDS(0x13);
+    INSERT_PADDING_WORDS(0x3);
 
     const std::array<Regs::TevStageConfig,6> GetTevStages() const {
         return { tev_stage0, tev_stage1,
@@ -298,6 +310,60 @@ struct Regs {
     };
 
     struct {
+        enum DepthFunc : u32 {
+            Always      = 1,
+            LessThan    = 4,
+            GreaterThan = 6,
+        };
+
+        union {
+            // If false, logic blending is used
+            BitField<8, 1, u32> alphablend_enable;
+        };
+
+        union {
+            enum BlendEquation : u32 {
+                Add = 0,
+            };
+
+            enum BlendFactor : u32 {
+                Zero = 0,
+                One = 1,
+
+                SourceAlpha = 6,
+                OneMinusSourceAlpha = 7,
+            };
+
+            BitField< 0, 8, BlendEquation> blend_equation_rgb;
+            BitField< 8, 8, BlendEquation> blend_equation_a;
+
+            BitField<16, 4, BlendFactor> factor_source_rgb;
+            BitField<20, 4, BlendFactor> factor_dest_rgb;
+
+            BitField<24, 4, BlendFactor> factor_source_a;
+            BitField<28, 4, BlendFactor> factor_dest_a;
+        } alpha_blending;
+
+        union {
+            enum Op {
+                Set = 4,
+            };
+
+            BitField<0, 4, Op> op;
+        } logic_op;
+
+        INSERT_PADDING_WORDS(0x4);
+
+        union {
+            BitField< 0, 1, u32> depth_test_enable;
+            BitField< 4, 3, DepthFunc> depth_test_func;
+            BitField<12, 1, u32> depth_write_enable;
+        };
+
+        INSERT_PADDING_WORDS(0x8);
+    } output_merger;
+
+    struct {
         enum ColorFormat : u32 {
             RGBA8    = 0,
             RGB8     = 1,
@@ -495,8 +561,14 @@ struct Regs {
     INSERT_PADDING_WORDS(0x51);
 
     BitField<0, 16, u32> vs_bool_uniforms;
+    union {
+        BitField< 0, 8, u32> x;
+        BitField< 8, 8, u32> y;
+        BitField<16, 8, u32> z;
+        BitField<24, 8, u32> w;
+    } vs_int_uniforms[4];
 
-    INSERT_PADDING_WORDS(0x9);
+    INSERT_PADDING_WORDS(0x5);
 
     // Offset to shader program entry point (in words)
     BitField<0, 16, u32> vs_main_offset;
@@ -599,6 +671,7 @@ struct Regs {
             } while(false)
 
         ADD_FIELD(trigger_irq);
+        ADD_FIELD(cull_mode);
         ADD_FIELD(viewport_size_x);
         ADD_FIELD(viewport_size_y);
         ADD_FIELD(viewport_depth_range);
@@ -617,6 +690,7 @@ struct Regs {
         ADD_FIELD(tev_stage3);
         ADD_FIELD(tev_stage4);
         ADD_FIELD(tev_stage5);
+        ADD_FIELD(output_merger);
         ADD_FIELD(framebuffer);
         ADD_FIELD(vertex_attributes);
         ADD_FIELD(index_array);
@@ -625,6 +699,7 @@ struct Regs {
         ADD_FIELD(trigger_draw_indexed);
         ADD_FIELD(triangle_topology);
         ADD_FIELD(vs_bool_uniforms);
+        ADD_FIELD(vs_int_uniforms);
         ADD_FIELD(vs_main_offset);
         ADD_FIELD(vs_input_register_map);
         ADD_FIELD(vs_uniform_setup);
@@ -668,6 +743,7 @@ private:
 #define ASSERT_REG_POSITION(field_name, position) static_assert(offsetof(Regs, field_name) == position * 4, "Field "#field_name" has invalid position")
 
 ASSERT_REG_POSITION(trigger_irq, 0x10);
+ASSERT_REG_POSITION(cull_mode, 0x40);
 ASSERT_REG_POSITION(viewport_size_x, 0x41);
 ASSERT_REG_POSITION(viewport_size_y, 0x43);
 ASSERT_REG_POSITION(viewport_depth_range, 0x4d);
@@ -688,6 +764,7 @@ ASSERT_REG_POSITION(tev_stage2, 0xd0);
 ASSERT_REG_POSITION(tev_stage3, 0xd8);
 ASSERT_REG_POSITION(tev_stage4, 0xf0);
 ASSERT_REG_POSITION(tev_stage5, 0xf8);
+ASSERT_REG_POSITION(output_merger, 0x100);
 ASSERT_REG_POSITION(framebuffer, 0x110);
 ASSERT_REG_POSITION(vertex_attributes, 0x200);
 ASSERT_REG_POSITION(index_array, 0x227);
@@ -696,6 +773,7 @@ ASSERT_REG_POSITION(trigger_draw, 0x22e);
 ASSERT_REG_POSITION(trigger_draw_indexed, 0x22f);
 ASSERT_REG_POSITION(triangle_topology, 0x25e);
 ASSERT_REG_POSITION(vs_bool_uniforms, 0x2b0);
+ASSERT_REG_POSITION(vs_int_uniforms, 0x2b1);
 ASSERT_REG_POSITION(vs_main_offset, 0x2ba);
 ASSERT_REG_POSITION(vs_input_register_map, 0x2bb);
 ASSERT_REG_POSITION(vs_uniform_setup, 0x2c0);
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index a80148872..025d4e484 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -18,51 +18,82 @@ namespace Pica {
 namespace Rasterizer {
 
 static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
-    u32* color_buffer = reinterpret_cast<u32*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetColorBufferPhysicalAddress())));
+    const PAddr addr = registers.framebuffer.GetColorBufferPhysicalAddress();
+    u32* color_buffer = reinterpret_cast<u32*>(Memory::GetPointer(PAddrToVAddr(addr)));
     u32 value = (color.a() << 24) | (color.r() << 16) | (color.g() << 8) | color.b();
 
     // Assuming RGBA8 format until actual framebuffer format handling is implemented
     *(color_buffer + x + y * registers.framebuffer.GetWidth()) = value;
 }
 
+static const Math::Vec4<u8> GetPixel(int x, int y) {
+    const PAddr addr = registers.framebuffer.GetColorBufferPhysicalAddress();
+    u32* color_buffer_u32 = reinterpret_cast<u32*>(Memory::GetPointer(PAddrToVAddr(addr)));
+
+    u32 value = *(color_buffer_u32 + x + y * registers.framebuffer.GetWidth());
+    Math::Vec4<u8> ret;
+    ret.a() = value >> 24;
+    ret.r() = (value >> 16) & 0xFF;
+    ret.g() = (value >> 8) & 0xFF;
+    ret.b() = value & 0xFF;
+    return ret;
+ }
+
 static u32 GetDepth(int x, int y) {
-    u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetDepthBufferPhysicalAddress())));
+    const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();
+    u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr)));
 
     // Assuming 16-bit depth buffer format until actual format handling is implemented
     return *(depth_buffer + x + y * registers.framebuffer.GetWidth());
 }
 
 static void SetDepth(int x, int y, u16 value) {
-    u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetDepthBufferPhysicalAddress())));
+    const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();
+    u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr)));
 
     // Assuming 16-bit depth buffer format until actual format handling is implemented
     *(depth_buffer + x + y * registers.framebuffer.GetWidth()) = value;
 }
 
-void ProcessTriangle(const VertexShader::OutputVertex& v0,
-                     const VertexShader::OutputVertex& v1,
-                     const VertexShader::OutputVertex& v2)
-{
-    // NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
-    struct Fix12P4 {
-        Fix12P4() {}
-        Fix12P4(u16 val) : val(val) {}
+// NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
+struct Fix12P4 {
+    Fix12P4() {}
+    Fix12P4(u16 val) : val(val) {}
 
-        static u16 FracMask() { return 0xF; }
-        static u16 IntMask() { return (u16)~0xF; }
+    static u16 FracMask() { return 0xF; }
+    static u16 IntMask() { return (u16)~0xF; }
 
-        operator u16() const {
-            return val;
-        }
+    operator u16() const {
+        return val;
+    }
 
-        bool operator < (const Fix12P4& oth) const {
-            return (u16)*this < (u16)oth;
-        }
+    bool operator < (const Fix12P4& oth) const {
+        return (u16)*this < (u16)oth;
+    }
 
-    private:
-        u16 val;
-    };
+private:
+    u16 val;
+};
+
+/**
+ * Calculate signed area of the triangle spanned by the three argument vertices.
+ * The sign denotes an orientation.
+ *
+ * @todo define orientation concretely.
+ */
+static int SignedArea (const Math::Vec2<Fix12P4>& vtx1,
+                       const Math::Vec2<Fix12P4>& vtx2,
+                       const Math::Vec2<Fix12P4>& vtx3) {
+    const auto vec1 = Math::MakeVec(vtx2 - vtx1, 0);
+    const auto vec2 = Math::MakeVec(vtx3 - vtx1, 0);
+    // TODO: There is a very small chance this will overflow for sizeof(int) == 4
+    return Math::Cross(vec1, vec2).z;
+};
 
+void ProcessTriangle(const VertexShader::OutputVertex& v0,
+                     const VertexShader::OutputVertex& v1,
+                     const VertexShader::OutputVertex& v2)
+{
     // vertex positions in rasterizer coordinates
     auto FloatToFix = [](float24 flt) {
                           return Fix12P4(static_cast<unsigned short>(flt.ToFloat32() * 16.0f));
@@ -70,10 +101,23 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
     auto ScreenToRasterizerCoordinates = [FloatToFix](const Math::Vec3<float24> vec) {
                                              return Math::Vec3<Fix12P4>{FloatToFix(vec.x), FloatToFix(vec.y), FloatToFix(vec.z)};
                                          };
+
     Math::Vec3<Fix12P4> vtxpos[3]{ ScreenToRasterizerCoordinates(v0.screenpos),
                                    ScreenToRasterizerCoordinates(v1.screenpos),
                                    ScreenToRasterizerCoordinates(v2.screenpos) };
 
+    if (registers.cull_mode == Regs::CullMode::KeepClockWise) {
+        // Reverse vertex order and use the CCW code path.
+        std::swap(vtxpos[1], vtxpos[2]);
+    }
+
+    if (registers.cull_mode != Regs::CullMode::KeepAll) {
+        // Cull away triangles which are wound clockwise.
+        // TODO: A check for degenerate triangles ("== 0") should be considered for CullMode::KeepAll
+        if (SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0)
+            return;
+    }
+
     // TODO: Proper scissor rect test!
     u16 min_x = std::min({vtxpos[0].x, vtxpos[1].x, vtxpos[2].x});
     u16 min_y = std::min({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y});
@@ -116,18 +160,9 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
         for (u16 x = min_x; x < max_x; x += 0x10) {
 
             // Calculate the barycentric coordinates w0, w1 and w2
-            auto orient2d = [](const Math::Vec2<Fix12P4>& vtx1,
-                               const Math::Vec2<Fix12P4>& vtx2,
-                               const Math::Vec2<Fix12P4>& vtx3) {
-                const auto vec1 = Math::MakeVec(vtx2 - vtx1, 0);
-                const auto vec2 = Math::MakeVec(vtx3 - vtx1, 0);
-                // TODO: There is a very small chance this will overflow for sizeof(int) == 4
-                return Math::Cross(vec1, vec2).z;
-            };
-
-            int w0 = bias0 + orient2d(vtxpos[1].xy(), vtxpos[2].xy(), {x, y});
-            int w1 = bias1 + orient2d(vtxpos[2].xy(), vtxpos[0].xy(), {x, y});
-            int w2 = bias2 + orient2d(vtxpos[0].xy(), vtxpos[1].xy(), {x, y});
+            int w0 = bias0 + SignedArea(vtxpos[1].xy(), vtxpos[2].xy(), {x, y});
+            int w1 = bias1 + SignedArea(vtxpos[2].xy(), vtxpos[0].xy(), {x, y});
+            int w2 = bias2 + SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), {x, y});
             int wsum = w0 + w1 + w2;
 
             // If current pixel is not covered by the current primitive
@@ -201,8 +236,8 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
                             return 0;
                     }
                 };
-                s = GetWrappedTexCoord(registers.texture0.wrap_s, s, registers.texture0.width);
-                t = GetWrappedTexCoord(registers.texture0.wrap_t, t, registers.texture0.height);
+                s = GetWrappedTexCoord(texture.config.wrap_s, s, texture.config.width);
+                t = texture.config.height - 1 - GetWrappedTexCoord(texture.config.wrap_t, t, texture.config.height);
 
                 u8* texture_data = Memory::GetPointer(PAddrToVAddr(texture.config.GetPhysicalAddress()));
                 auto info = DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format);
@@ -279,12 +314,15 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
                     }
                 };
 
-                auto GetColorModifier = [](ColorModifier factor, const Math::Vec4<u8>& values) -> Math::Vec3<u8> {
+                static auto GetColorModifier = [](ColorModifier factor, const Math::Vec4<u8>& values) -> Math::Vec3<u8> {
                     switch (factor)
                     {
                     case ColorModifier::SourceColor:
                         return values.rgb();
 
+                    case ColorModifier::OneMinusSourceColor:
+                        return (Math::Vec3<u8>(255, 255, 255) - values.rgb()).Cast<u8>();
+
                     case ColorModifier::SourceAlpha:
                         return { values.a(), values.a(), values.a() };
 
@@ -295,7 +333,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
                     }
                 };
 
-                auto GetAlphaModifier = [](AlphaModifier factor, u8 value) -> u8 {
+                static auto GetAlphaModifier = [](AlphaModifier factor, u8 value) -> u8 {
                     switch (factor) {
                     case AlphaModifier::SourceAlpha:
                         return value;
@@ -310,7 +348,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
                     }
                 };
 
-                auto ColorCombine = [](Operation op, const Math::Vec3<u8> input[3]) -> Math::Vec3<u8> {
+                static auto ColorCombine = [](Operation op, const Math::Vec3<u8> input[3]) -> Math::Vec3<u8> {
                     switch (op) {
                     case Operation::Replace:
                         return input[0];
@@ -330,6 +368,15 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
                     case Operation::Lerp:
                         return ((input[0] * input[2] + input[1] * (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / 255).Cast<u8>();
 
+                    case Operation::Subtract:
+                    {
+                        auto result = input[0].Cast<int>() - input[1].Cast<int>();
+                        result.r() = std::max(0, result.r());
+                        result.g() = std::max(0, result.g());
+                        result.b() = std::max(0, result.b());
+                        return result.Cast<u8>();
+                    }
+
                     default:
                         LOG_ERROR(HW_GPU, "Unknown color combiner operation %d\n", (int)op);
                         _dbg_assert_(HW_GPU, 0);
@@ -337,7 +384,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
                     }
                 };
 
-                auto AlphaCombine = [](Operation op, const std::array<u8,3>& input) -> u8 {
+                static auto AlphaCombine = [](Operation op, const std::array<u8,3>& input) -> u8 {
                     switch (op) {
                     case Operation::Replace:
                         return input[0];
@@ -351,6 +398,9 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
                     case Operation::Lerp:
                         return (input[0] * input[2] + input[1] * (255 - input[2])) / 255;
 
+                    case Operation::Subtract:
+                        return std::max(0, (int)input[0] - (int)input[1]);
+
                     default:
                         LOG_ERROR(HW_GPU, "Unknown alpha combiner operation %d\n", (int)op);
                         _dbg_assert_(HW_GPU, 0);
@@ -381,12 +431,111 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
                 combiner_output = Math::MakeVec(color_output, alpha_output);
             }
 
-            // TODO: Not sure if the multiplication by 65535 has already been taken care
-            // of when transforming to screen coordinates or not.
-            u16 z = (u16)(((float)v0.screenpos[2].ToFloat32() * w0 +
-                           (float)v1.screenpos[2].ToFloat32() * w1 +
-                           (float)v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum);
-            SetDepth(x >> 4, y >> 4, z);
+            // TODO: Does depth indeed only get written even if depth testing is enabled?
+            if (registers.output_merger.depth_test_enable) {
+                u16 z = (u16)(-(v0.screenpos[2].ToFloat32() * w0 +
+                            v1.screenpos[2].ToFloat32() * w1 +
+                            v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum);
+                u16 ref_z = GetDepth(x >> 4, y >> 4);
+
+                bool pass = false;
+
+                switch (registers.output_merger.depth_test_func) {
+                case registers.output_merger.Always:
+                    pass = true;
+                    break;
+
+                case registers.output_merger.LessThan:
+                    pass = z < ref_z;
+                    break;
+
+                case registers.output_merger.GreaterThan:
+                    pass = z > ref_z;
+                    break;
+
+                default:
+                    LOG_ERROR(HW_GPU, "Unknown depth test function %x", registers.output_merger.depth_test_func.Value());
+                    break;
+                }
+
+                if (!pass)
+                    continue;
+
+                if (registers.output_merger.depth_write_enable)
+                    SetDepth(x >> 4, y >> 4, z);
+            }
+
+            auto dest = GetPixel(x >> 4, y >> 4);
+
+            if (registers.output_merger.alphablend_enable) {
+                auto params = registers.output_merger.alpha_blending;
+
+                auto LookupFactorRGB = [&](decltype(params)::BlendFactor factor) -> Math::Vec3<u8> {
+                    switch(factor) {
+                    case params.Zero:
+                        return Math::Vec3<u8>(0, 0, 0);
+
+                    case params.One:
+                        return Math::Vec3<u8>(255, 255, 255);
+
+                    case params.SourceAlpha:
+                        return Math::MakeVec(combiner_output.a(), combiner_output.a(), combiner_output.a());
+
+                    case params.OneMinusSourceAlpha:
+                        return Math::Vec3<u8>(255-combiner_output.a(), 255-combiner_output.a(), 255-combiner_output.a());
+
+                    default:
+                        LOG_CRITICAL(HW_GPU, "Unknown color blend factor %x", factor);
+                        exit(0);
+                        break;
+                    }
+                };
+
+                auto LookupFactorA = [&](decltype(params)::BlendFactor factor) -> u8 {
+                    switch(factor) {
+                    case params.Zero:
+                        return 0;
+
+                    case params.One:
+                        return 255;
+
+                    case params.SourceAlpha:
+                        return combiner_output.a();
+
+                    case params.OneMinusSourceAlpha:
+                        return 255 - combiner_output.a();
+
+                    default:
+                        LOG_CRITICAL(HW_GPU, "Unknown alpha blend factor %x", factor);
+                        exit(0);
+                        break;
+                    }
+                };
+
+                auto srcfactor = Math::MakeVec(LookupFactorRGB(params.factor_source_rgb),
+                                               LookupFactorA(params.factor_source_a));
+                auto dstfactor = Math::MakeVec(LookupFactorRGB(params.factor_dest_rgb),
+                                               LookupFactorA(params.factor_dest_a));
+
+                switch (params.blend_equation_rgb) {
+                case params.Add:
+                {
+                    auto result = (combiner_output * srcfactor + dest * dstfactor) / 255;
+                    result.r() = std::min(255, result.r());
+                    result.g() = std::min(255, result.g());
+                    result.b() = std::min(255, result.b());
+                    combiner_output = result.Cast<u8>();
+                    break;
+                }
+
+                default:
+                    LOG_CRITICAL(HW_GPU, "Unknown RGB blend equation %x", params.blend_equation_rgb.Value());
+                    exit(0);
+                }
+            } else {
+                LOG_CRITICAL(HW_GPU, "logic op: %x", registers.output_merger.logic_op);
+                exit(0);
+            }
 
             DrawPixel(x >> 4, y >> 4, combiner_output);
         }
diff --git a/src/video_core/utils.h b/src/video_core/utils.h
index 63ebccbde..6fd640425 100644
--- a/src/video_core/utils.h
+++ b/src/video_core/utils.h
@@ -8,32 +8,6 @@
 
 #include "common/common_types.h"
 
-namespace FormatPrecision {
-
-/// Adjust RGBA8 color with RGBA6 precision
-static inline u32 rgba8_with_rgba6(u32 src) {
-    u32 color = src;
-    color &= 0xFCFCFCFC;
-    color |= (color >> 6) & 0x03030303;
-    return color;
-}
-
-/// Adjust RGBA8 color with RGB565 precision
-static inline u32 rgba8_with_rgb565(u32 src) {
-    u32 color = (src & 0xF8FCF8);
-    color |= (color >> 5) & 0x070007;
-    color |= (color >> 6) & 0x000300;
-    color |= 0xFF000000;
-    return color;
-}
-
-/// Adjust Z24 depth value with Z16 precision
-static inline u32 z24_with_z16(u32 src) {
-    return (src & 0xFFFF00) | (src >> 16);
-}
-
-} // namespace
-
 namespace VideoCore {
 
 /// Structure for the TGA texture format (for dumping)
diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp
index bed5081a0..ff825e2e1 100644
--- a/src/video_core/vertex_shader.cpp
+++ b/src/video_core/vertex_shader.cpp
@@ -30,6 +30,8 @@ static struct {
     Math::Vec4<float24> f[96];
 
     std::array<bool,16> b;
+
+    std::array<Math::Vec4<u8>,4> i;
 } shader_uniforms;
 
 // TODO: Not sure where the shader binary and swizzle patterns are supposed to be loaded to!
@@ -37,33 +39,31 @@ static struct {
 static std::array<u32, 1024> shader_memory;
 static std::array<u32, 1024> swizzle_data;
 
-void SubmitShaderMemoryChange(u32 addr, u32 value)
-{
+void SubmitShaderMemoryChange(u32 addr, u32 value) {
     shader_memory[addr] = value;
 }
 
-void SubmitSwizzleDataChange(u32 addr, u32 value)
-{
+void SubmitSwizzleDataChange(u32 addr, u32 value) {
     swizzle_data[addr] = value;
 }
 
-Math::Vec4<float24>& GetFloatUniform(u32 index)
-{
+Math::Vec4<float24>& GetFloatUniform(u32 index) {
     return shader_uniforms.f[index];
 }
 
-bool& GetBoolUniform(u32 index)
-{
+bool& GetBoolUniform(u32 index) {
     return shader_uniforms.b[index];
 }
 
-const std::array<u32, 1024>& GetShaderBinary()
-{
+Math::Vec4<u8>& GetIntUniform(u32 index) {
+    return shader_uniforms.i[index];
+}
+
+const std::array<u32, 1024>& GetShaderBinary() {
     return shader_memory;
 }
 
-const std::array<u32, 1024>& GetSwizzlePatterns()
-{
+const std::array<u32, 1024>& GetSwizzlePatterns() {
     return swizzle_data;
 }
 
@@ -437,8 +437,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
     }
 }
 
-OutputVertex RunShader(const InputVertex& input, int num_attributes)
-{
+OutputVertex RunShader(const InputVertex& input, int num_attributes) {
     VertexShaderState state;
 
     const u32* main = &shader_memory[registers.vs_main_offset];
diff --git a/src/video_core/vertex_shader.h b/src/video_core/vertex_shader.h
index af3fb2a2f..3a68a3409 100644
--- a/src/video_core/vertex_shader.h
+++ b/src/video_core/vertex_shader.h
@@ -73,6 +73,7 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes);
 
 Math::Vec4<float24>& GetFloatUniform(u32 index);
 bool& GetBoolUniform(u32 index);
+Math::Vec4<u8>& GetIntUniform(u32 index);
 
 const std::array<u32, 1024>& GetShaderBinary();
 const std::array<u32, 1024>& GetSwizzlePatterns();