diff --git a/src/citra/citra.cpp b/src/citra/citra.cpp
index 6f3bc6f844..5a8642d1b8 100644
--- a/src/citra/citra.cpp
+++ b/src/citra/citra.cpp
@@ -24,7 +24,14 @@ int __cdecl main(int argc, char **argv) {
 
     System::Init(emu_window);
 
-    std::string boot_filename = "homebrew.elf";
+    std::string boot_filename;
+
+    if (argc < 2) {
+        ERROR_LOG(BOOT, "Failed to load ROM: No ROM specified");
+    }
+    else {
+        boot_filename = argv[1];
+    }
     std::string error_str;
 
     bool res = Loader::LoadFile(boot_filename, &error_str);
diff --git a/src/citra_qt/main.cpp b/src/citra_qt/main.cpp
index 76e0c68c36..9be9829093 100644
--- a/src/citra_qt/main.cpp
+++ b/src/citra_qt/main.cpp
@@ -142,7 +142,7 @@ void GMainWindow::BootGame(const char* filename)
 
 void GMainWindow::OnMenuLoadFile()
 {
-    QString filename = QFileDialog::getOpenFileName(this, tr("Load file"), QString(), tr("3DS homebrew (*.elf *.dat *.bin)"));
+    QString filename = QFileDialog::getOpenFileName(this, tr("Load file"), QString(), tr("3DS homebrew (*.elf *.axf *.dat *.bin)"));
     if (filename.size())
        BootGame(filename.toLatin1().data());
 }
diff --git a/src/common/common.vcxproj b/src/common/common.vcxproj
index 5dc6ff7907..86295a4801 100644
--- a/src/common/common.vcxproj
+++ b/src/common/common.vcxproj
@@ -190,6 +190,7 @@
     
     
     
+    
     
     
     
diff --git a/src/common/common.vcxproj.filters b/src/common/common.vcxproj.filters
index 268730228d..84cfa8837f 100644
--- a/src/common/common.vcxproj.filters
+++ b/src/common/common.vcxproj.filters
@@ -40,6 +40,7 @@
     
     
     
+    
   
   
     
diff --git a/src/common/common_funcs.h b/src/common/common_funcs.h
index f8d10eb3e5..dca4dc47fb 100644
--- a/src/common/common_funcs.h
+++ b/src/common/common_funcs.h
@@ -22,6 +22,11 @@ template<> struct CompileTimeAssert {};
 #define b32(x)  (b16(x) | (b16(x) >>16) )
 #define ROUND_UP_POW2(x)    (b32(x - 1) + 1)
 
+#define MIN(a, b)   ((a)<(b)?(a):(b))
+#define MAX(a, b)   ((a)>(b)?(a):(b))
+
+#define CLAMP(x, min, max)  (((x) > max) ? max : (((x) < min) ? min : (x)))
+
 #define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
 
 #ifndef _WIN32
diff --git a/src/common/log.h b/src/common/log.h
index d95f51f564..8b39b03a17 100644
--- a/src/common/log.h
+++ b/src/common/log.h
@@ -5,6 +5,8 @@
 #ifndef _LOG_H_
 #define _LOG_H_
 
+#define LOGGING
+
 #define    NOTICE_LEVEL  1  // VERY important information that is NOT errors. Like startup and OSReports.
 #define    ERROR_LEVEL   2  // Critical errors 
 #define    WARNING_LEVEL 3  // Something is suspicious.
@@ -53,7 +55,7 @@ enum LOG_TYPE {
     WII_IPC_ES,
     WII_IPC_FILEIO,
     WII_IPC_HID,
-    WII_IPC_HLE,
+    KERNEL,
     SVC,
     NDMA,
     HLE,
diff --git a/src/common/log_manager.cpp b/src/common/log_manager.cpp
index 80fd473b93..146472888d 100644
--- a/src/common/log_manager.cpp
+++ b/src/common/log_manager.cpp
@@ -60,13 +60,13 @@ LogManager::LogManager()
     m_Log[LogTypes::LOADER]             = new LogContainer("Loader",            "Loader");
     m_Log[LogTypes::FILESYS]            = new LogContainer("FileSys",           "File System");
     m_Log[LogTypes::WII_IPC_HID]        = new LogContainer("WII_IPC_HID",       "WII IPC HID");
-    m_Log[LogTypes::WII_IPC_HLE]        = new LogContainer("WII_IPC_HLE",       "WII IPC HLE");
+    m_Log[LogTypes::KERNEL]             = new LogContainer("KERNEL",            "KERNEL HLE");
     m_Log[LogTypes::WII_IPC_DVD]        = new LogContainer("WII_IPC_DVD",       "WII IPC DVD");
     m_Log[LogTypes::WII_IPC_ES]         = new LogContainer("WII_IPC_ES",        "WII IPC ES");
     m_Log[LogTypes::WII_IPC_FILEIO]     = new LogContainer("WII_IPC_FILEIO",    "WII IPC FILEIO");
     m_Log[LogTypes::RENDER]             = new LogContainer("RENDER",            "RENDER");
     m_Log[LogTypes::LCD]                = new LogContainer("LCD",               "LCD");
-    m_Log[LogTypes::SVC]                = new LogContainer("SVC",               "Supervisor Call");
+    m_Log[LogTypes::SVC]                = new LogContainer("SVC",               "Supervisor Call HLE");
     m_Log[LogTypes::NDMA]               = new LogContainer("NDMA",              "NDMA");
     m_Log[LogTypes::HLE]                = new LogContainer("HLE",               "High Level Emulation");
     m_Log[LogTypes::HW]                 = new LogContainer("HW",                "Hardware");
diff --git a/src/common/thread_queue_list.h b/src/common/thread_queue_list.h
new file mode 100644
index 0000000000..4a89572f60
--- /dev/null
+++ b/src/common/thread_queue_list.h
@@ -0,0 +1,216 @@
+// Copyright 2014 Citra Emulator Project / PPSSPP Project
+// Licensed under GPLv2
+// Refer to the license.txt file included.  
+
+#pragma once
+
+#include "common/common.h"
+
+namespace Common {
+
+template
+struct ThreadQueueList {
+    // Number of queues (number of priority levels starting at 0.)
+    static const int NUM_QUEUES = 128;
+    
+    // Initial number of threads a single queue can handle.
+    static const int INITIAL_CAPACITY = 32;
+
+    struct Queue {
+        // Next ever-been-used queue (worse priority.)
+        Queue *next;
+        // First valid item in data.
+        int first;
+        // One after last valid item in data.
+        int end;
+        // A too-large array with room on the front and end.
+        IdType *data;
+        // Size of data array.
+        int capacity;
+    };
+
+    ThreadQueueList() {
+        memset(queues, 0, sizeof(queues));
+        first = invalid();
+    }
+
+    ~ThreadQueueList() {
+        for (int i = 0; i < NUM_QUEUES; ++i)
+        {
+            if (queues[i].data != NULL)
+                free(queues[i].data);
+        }
+    }
+
+    // Only for debugging, returns priority level.
+    int contains(const IdType uid) {
+        for (int i = 0; i < NUM_QUEUES; ++i)
+        {
+            if (queues[i].data == NULL)
+                continue;
+
+            Queue *cur = &queues[i];
+            for (int j = cur->first; j < cur->end; ++j)
+            {
+                if (cur->data[j] == uid)
+                    return i;
+            }
+        }
+
+        return -1;
+    }
+
+    inline IdType pop_first() {
+        Queue *cur = first;
+        while (cur != invalid())
+        {
+            if (cur->end - cur->first > 0)
+                return cur->data[cur->first++];
+            cur = cur->next;
+        }
+
+        //_dbg_assert_msg_(SCEKERNEL, false, "ThreadQueueList should not be empty.");
+        return 0;
+    }
+
+    inline IdType pop_first_better(u32 priority) {
+        Queue *cur = first;
+        Queue *stop = &queues[priority];
+        while (cur < stop)
+        {
+            if (cur->end - cur->first > 0)
+                return cur->data[cur->first++];
+            cur = cur->next;
+        }
+
+        return 0;
+    }
+
+    inline void push_front(u32 priority, const IdType threadID) {
+        Queue *cur = &queues[priority];
+        cur->data[--cur->first] = threadID;
+        if (cur->first == 0)
+            rebalance(priority);
+    }
+
+    inline void push_back(u32 priority, const IdType threadID) {
+        Queue *cur = &queues[priority];
+        cur->data[cur->end++] = threadID;
+        if (cur->end == cur->capacity)
+            rebalance(priority);
+    }
+
+    inline void remove(u32 priority, const IdType threadID) {
+        Queue *cur = &queues[priority];
+        //_dbg_assert_msg_(SCEKERNEL, cur->next != NULL, "ThreadQueueList::Queue should already be linked up.");
+
+        for (int i = cur->first; i < cur->end; ++i)
+        {
+            if (cur->data[i] == threadID)
+            {
+                int remaining = --cur->end - i;
+                if (remaining > 0)
+                    memmove(&cur->data[i], &cur->data[i + 1], remaining * sizeof(IdType));
+                return;
+            }
+        }
+
+        // Wasn't there.
+    }
+
+    inline void rotate(u32 priority) {
+        Queue *cur = &queues[priority];
+        //_dbg_assert_msg_(SCEKERNEL, cur->next != NULL, "ThreadQueueList::Queue should already be linked up.");
+
+        if (cur->end - cur->first > 1)
+        {
+            cur->data[cur->end++] = cur->data[cur->first++];
+            if (cur->end == cur->capacity)
+                rebalance(priority);
+        }
+    }
+
+    inline void clear() {
+        for (int i = 0; i < NUM_QUEUES; ++i)
+        {
+            if (queues[i].data != NULL)
+                free(queues[i].data);
+        }
+        memset(queues, 0, sizeof(queues));
+        first = invalid();
+    }
+
+    inline bool empty(u32 priority) const {
+        const Queue *cur = &queues[priority];
+        return cur->first == cur->end;
+    }
+
+    inline void prepare(u32 priority) {
+        Queue *cur = &queues[priority];
+        if (cur->next == NULL)
+            link(priority, INITIAL_CAPACITY);
+    }
+
+private:
+    Queue *invalid() const {
+        return (Queue *) -1;
+    }
+
+    void link(u32 priority, int size) {
+        //_dbg_assert_msg_(SCEKERNEL, queues[priority].data == NULL, "ThreadQueueList::Queue should only be initialized once.");
+
+        if (size <= INITIAL_CAPACITY)
+            size = INITIAL_CAPACITY;
+        else
+        {
+            int goal = size;
+            size = INITIAL_CAPACITY;
+            while (size < goal)
+                size *= 2;
+        }
+        Queue *cur = &queues[priority];
+        cur->data = (IdType *) malloc(sizeof(IdType) * size);
+        cur->capacity = size;
+        cur->first = size / 2;
+        cur->end = size / 2;
+
+        for (int i = (int) priority - 1; i >= 0; --i)
+        {
+            if (queues[i].next != NULL)
+            {
+                cur->next = queues[i].next;
+                queues[i].next = cur;
+                return;
+            }
+        }
+
+        cur->next = first;
+        first = cur;
+    }
+
+    void rebalance(u32 priority) {
+        Queue *cur = &queues[priority];
+        int size = cur->end - cur->first;
+        if (size >= cur->capacity - 2)  {
+            IdType *new_data = (IdType *)realloc(cur->data, cur->capacity * 2 * sizeof(IdType));
+            if (new_data != NULL)  {
+                cur->capacity *= 2;
+                cur->data = new_data;
+            }
+        }
+
+        int newFirst = (cur->capacity - size) / 2;
+        if (newFirst != cur->first) {
+            memmove(&cur->data[newFirst], &cur->data[cur->first], size * sizeof(IdType));
+            cur->first = newFirst;
+            cur->end = newFirst + size;
+        }
+    }
+
+    // The first queue that's ever been used.
+    Queue *first;
+    // The priority level queues of thread ids.
+    Queue queues[NUM_QUEUES];
+};
+
+} // namespace
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 14c598bf3d..4086b415bf 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -33,7 +33,10 @@ set(SRCS    core.cpp
             hle/hle.cpp
             hle/config_mem.cpp
             hle/coprocessor.cpp
-            hle/syscall.cpp
+            hle/svc.cpp
+            hle/kernel/kernel.cpp
+            hle/kernel/mutex.cpp
+            hle/kernel/thread.cpp
             hle/service/apt.cpp
             hle/service/gsp.cpp
             hle/service/hid.cpp
@@ -75,7 +78,10 @@ set(HEADERS core.h
             hle/config_mem.h
             hle/coprocessor.h
             hle/hle.h
-            hle/syscall.h
+            hle/svc.h
+            hle/kernel/kernel.h
+            hle/kernel/mutex.h
+            hle/kernel/thread.h
             hle/function_wrappers.h
             hle/service/apt.h
             hle/service/gsp.h
diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h
index 9fdc7ba3c9..b73786ccd4 100644
--- a/src/core/arm/arm_interface.h
+++ b/src/core/arm/arm_interface.h
@@ -7,11 +7,13 @@
 #include "common/common.h"
 #include "common/common_types.h"
 
+#include "core/hle/svc.h"
+
 /// Generic ARM11 CPU interface
 class ARM_Interface : NonCopyable {
 public:
     ARM_Interface() {
-        m_num_instructions = 0;
+        num_instructions = 0;
     }
 
     ~ARM_Interface() {
@@ -23,7 +25,7 @@ public:
      */
     void Run(int num_instructions) {
         ExecuteInstructions(num_instructions);
-        m_num_instructions += num_instructions;
+        num_instructions += num_instructions;
     }
 
     /// Step CPU by one instruction
@@ -63,15 +65,33 @@ public:
      */
     virtual u32 GetCPSR() const = 0;  
 
+    /**
+     * Set the current CPSR register
+     * @param cpsr Value to set CPSR to
+     */
+    virtual void SetCPSR(u32 cpsr) = 0;
+
     /**
      * Returns the number of clock ticks since the last rese
      * @return Returns number of clock ticks
      */
     virtual u64 GetTicks() const = 0;
 
-    /// Getter for m_num_instructions
+    /**
+     * Saves the current CPU context
+     * @param ctx Thread context to save
+     */
+    virtual void SaveContext(ThreadContext& ctx) = 0;
+
+    /**
+     * Loads a CPU context
+     * @param ctx Thread context to load
+     */
+    virtual void LoadContext(const ThreadContext& ctx) = 0;
+
+    /// Getter for num_instructions
     u64 GetNumInstructions() {
-        return m_num_instructions;
+        return num_instructions;
     }
 
 protected:
@@ -84,6 +104,6 @@ protected:
 
 private:
 
-    u64 m_num_instructions;                     ///< Number of instructions executed
+    u64 num_instructions; ///< Number of instructions executed
 
 };
diff --git a/src/core/arm/interpreter/arm_interpreter.cpp b/src/core/arm/interpreter/arm_interpreter.cpp
index 23d96d292b..17f787b862 100644
--- a/src/core/arm/interpreter/arm_interpreter.cpp
+++ b/src/core/arm/interpreter/arm_interpreter.cpp
@@ -9,30 +9,30 @@ const static cpu_config_t s_arm11_cpu_info = {
 };
 
 ARM_Interpreter::ARM_Interpreter()  {
-    m_state = new ARMul_State;
+    state = new ARMul_State;
 
     ARMul_EmulateInit();
-    ARMul_NewState(m_state);
+    ARMul_NewState(state);
 
-    m_state->abort_model = 0;
-    m_state->cpu = (cpu_config_t*)&s_arm11_cpu_info;
-    m_state->bigendSig = LOW;
+    state->abort_model = 0;
+    state->cpu = (cpu_config_t*)&s_arm11_cpu_info;
+    state->bigendSig = LOW;
 
-    ARMul_SelectProcessor(m_state, ARM_v6_Prop | ARM_v5_Prop | ARM_v5e_Prop);
-    m_state->lateabtSig = LOW;
-    mmu_init(m_state);
+    ARMul_SelectProcessor(state, ARM_v6_Prop | ARM_v5_Prop | ARM_v5e_Prop);
+    state->lateabtSig = LOW;
+    mmu_init(state);
 
     // Reset the core to initial state
-    ARMul_Reset(m_state);
-    m_state->NextInstr = 0;
-    m_state->Emulate = 3;
+    ARMul_Reset(state);
+    state->NextInstr = 0;
+    state->Emulate = 3;
 
-    m_state->pc = m_state->Reg[15] = 0x00000000;
-    m_state->Reg[13] = 0x10000000; // Set stack pointer to the top of the stack
+    state->pc = state->Reg[15] = 0x00000000;
+    state->Reg[13] = 0x10000000; // Set stack pointer to the top of the stack
 }
 
 ARM_Interpreter::~ARM_Interpreter() {
-    delete m_state;
+    delete state;
 }
 
 /**
@@ -40,7 +40,7 @@ ARM_Interpreter::~ARM_Interpreter() {
  * @param addr Address to set PC to
  */
 void ARM_Interpreter::SetPC(u32 pc) {
-    m_state->pc = m_state->Reg[15] = pc;
+    state->pc = state->Reg[15] = pc;
 }
 
 /*
@@ -48,7 +48,7 @@ void ARM_Interpreter::SetPC(u32 pc) {
  * @return Returns current PC
  */
 u32 ARM_Interpreter::GetPC() const {
-    return m_state->pc;
+    return state->pc;
 }
 
 /**
@@ -57,7 +57,7 @@ u32 ARM_Interpreter::GetPC() const {
  * @return Returns the value in the register
  */
 u32 ARM_Interpreter::GetReg(int index) const {
-    return m_state->Reg[index];
+    return state->Reg[index];
 }
 
 /**
@@ -66,7 +66,7 @@ u32 ARM_Interpreter::GetReg(int index) const {
  * @param value Value to set register to
  */
 void ARM_Interpreter::SetReg(int index, u32 value) {
-    m_state->Reg[index] = value;
+    state->Reg[index] = value;
 }
 
 /**
@@ -74,7 +74,15 @@ void ARM_Interpreter::SetReg(int index, u32 value) {
  * @return Returns the value of the CPSR register
  */
 u32 ARM_Interpreter::GetCPSR() const {
-    return m_state->Cpsr;
+    return state->Cpsr;
+}
+
+/**
+ * Set the current CPSR register
+ * @param cpsr Value to set CPSR to
+ */
+void ARM_Interpreter::SetCPSR(u32 cpsr) {
+    state->Cpsr = cpsr;
 }
 
 /**
@@ -82,7 +90,7 @@ u32 ARM_Interpreter::GetCPSR() const {
  * @return Returns number of clock ticks
  */
 u64 ARM_Interpreter::GetTicks() const {
-    return ARMul_Time(m_state);
+    return ARMul_Time(state);
 }
 
 /**
@@ -90,6 +98,45 @@ u64 ARM_Interpreter::GetTicks() const {
  * @param num_instructions Number of instructions to executes
  */
 void ARM_Interpreter::ExecuteInstructions(int num_instructions) {
-    m_state->NumInstrsToExecute = num_instructions;
-    ARMul_Emulate32(m_state);
+    state->NumInstrsToExecute = num_instructions;
+    ARMul_Emulate32(state);
+}
+
+/**
+ * Saves the current CPU context
+ * @param ctx Thread context to save
+ * @todo Do we need to save Reg[15] and NextInstr?
+ */
+void ARM_Interpreter::SaveContext(ThreadContext& ctx) {
+    memcpy(ctx.cpu_registers, state->Reg, sizeof(ctx.cpu_registers));
+    memcpy(ctx.fpu_registers, state->ExtReg, sizeof(ctx.fpu_registers));
+
+    ctx.sp = state->Reg[13];
+    ctx.lr = state->Reg[14];
+    ctx.pc = state->pc;
+    ctx.cpsr = state->Cpsr;
+
+    ctx.fpscr = state->VFP[1];
+    ctx.fpexc = state->VFP[2];
+}
+
+/**
+ * Loads a CPU context
+ * @param ctx Thread context to load
+ * @param Do we need to load Reg[15] and NextInstr?
+ */
+void ARM_Interpreter::LoadContext(const ThreadContext& ctx) {
+    memcpy(state->Reg, ctx.cpu_registers, sizeof(ctx.cpu_registers));
+    memcpy(state->ExtReg, ctx.fpu_registers, sizeof(ctx.fpu_registers));
+
+    state->Reg[13] = ctx.sp;
+    state->Reg[14] = ctx.lr;
+    state->pc = ctx.pc;
+    state->Cpsr = ctx.cpsr;
+
+    state->VFP[1] = ctx.fpscr;
+    state->VFP[2] = ctx.fpexc;
+
+    state->Reg[15] = ctx.pc;
+    state->NextInstr = RESUME;
 }
diff --git a/src/core/arm/interpreter/arm_interpreter.h b/src/core/arm/interpreter/arm_interpreter.h
index 509025080b..6a531e4979 100644
--- a/src/core/arm/interpreter/arm_interpreter.h
+++ b/src/core/arm/interpreter/arm_interpreter.h
@@ -48,12 +48,30 @@ public:
      */
     u32 GetCPSR() const;
 
+    /**
+     * Set the current CPSR register
+     * @param cpsr Value to set CPSR to
+     */
+    void SetCPSR(u32 cpsr);
+
     /**
      * Returns the number of clock ticks since the last reset
      * @return Returns number of clock ticks
      */
     u64 GetTicks() const;
 
+    /**
+     * Saves the current CPU context
+     * @param ctx Thread context to save
+     */
+    void SaveContext(ThreadContext& ctx);
+
+    /**
+     * Loads a CPU context
+     * @param ctx Thread context to load
+     */
+    void LoadContext(const ThreadContext& ctx);
+
 protected:
 
     /**
@@ -64,6 +82,6 @@ protected:
 
 private:
 
-    ARMul_State* m_state;
+    ARMul_State* state;
 
 };
diff --git a/src/core/arm/interpreter/armdefs.h b/src/core/arm/interpreter/armdefs.h
index 5b2abc7f74..d8eae4d3f1 100644
--- a/src/core/arm/interpreter/armdefs.h
+++ b/src/core/arm/interpreter/armdefs.h
@@ -24,10 +24,6 @@
 
 #include "common/platform.h"
 
-#if EMU_PLATFORM == PLATFORM_WINDOWS
-#include 
-#endif
-
 //teawater add for arm2x86 2005.02.14-------------------------------------------
 // koodailar remove it for mingw 2005.12.18----------------
 //anthonylee modify it for portable 2007.01.30
diff --git a/src/core/arm/interpreter/armemu.cpp b/src/core/arm/interpreter/armemu.cpp
index 32e315f4b8..e5dc7bd44f 100644
--- a/src/core/arm/interpreter/armemu.cpp
+++ b/src/core/arm/interpreter/armemu.cpp
@@ -4478,8 +4478,7 @@ ARMul_Emulate26 (ARMul_State * state)
                                  isize) &
                                 R15PCBITS));
 #endif
-                    }
-                    else
+                    } else if (instr != 0xDEADC0DE) // thumbemu uses 0xDEADCODE for debugging to catch non updates 
                         ARMul_MCR (state, instr,
                                DEST);
                 }
@@ -4549,7 +4548,7 @@ ARMul_Emulate26 (ARMul_State * state)
                 //    ARMul_OSHandleSWI (state, BITS (0, 23));
                 //    break;
                 //}
-                HLE::CallSyscall(instr);
+                HLE::CallSVC(instr);
                 ARMul_Abort (state, ARMul_SWIV);
                 break;
             }
diff --git a/src/core/arm/interpreter/arminit.cpp b/src/core/arm/interpreter/arminit.cpp
index 2c771cdda5..e05667beac 100644
--- a/src/core/arm/interpreter/arminit.cpp
+++ b/src/core/arm/interpreter/arminit.cpp
@@ -17,8 +17,11 @@
 
 
 #include "common/platform.h"
+
 #if EMU_PLATFORM == PLATFORM_LINUX
 #include 
+#elif EMU_PLATFORM == PLATFORM_WINDOWS
+#include 
 #endif
 
 #include 
diff --git a/src/core/arm/interpreter/vfp/vfp.h b/src/core/arm/interpreter/vfp/vfp.h
index f738a615b1..bbf4caeb02 100644
--- a/src/core/arm/interpreter/vfp/vfp.h
+++ b/src/core/arm/interpreter/vfp/vfp.h
@@ -21,7 +21,7 @@
 #ifndef __VFP_H__
 #define __VFP_H__
 
-#define DBG(...) DEBUG_LOG(ARM11, __VA_ARGS__)
+#define DBG(...) //DEBUG_LOG(ARM11, __VA_ARGS__)
 
 #define vfpdebug //printf
 
diff --git a/src/core/core.cpp b/src/core/core.cpp
index 61c237b2c3..f88bcd704e 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -12,6 +12,8 @@
 #include "core/arm/disassembler/arm_disasm.h"
 #include "core/arm/interpreter/arm_interpreter.h"
 
+#include "core/hle/kernel/thread.h"
+
 namespace Core {
 
 ARM_Disasm*     g_disasm    = NULL; ///< ARM disassembler
@@ -21,14 +23,17 @@ ARM_Interface*  g_sys_core  = NULL; ///< ARM11 system (OS) core
 /// Run the core CPU loop
 void RunLoop() {
     for (;;){
-        g_app_core->Run(10000);
+        g_app_core->Run(100);
         HW::Update();
+        Kernel::Reschedule();
     }
 }
 
 /// Step the CPU one instruction
 void SingleStep() {
     g_app_core->Step();
+    HW::Update();
+    Kernel::Reschedule();
 }
 
 /// Halt the core
diff --git a/src/core/core.vcxproj b/src/core/core.vcxproj
index 41af5801d8..f271d336e0 100644
--- a/src/core/core.vcxproj
+++ b/src/core/core.vcxproj
@@ -168,12 +168,15 @@
     
     
     
+    
+    
+    
     
     
     
     
     
-    
+    
     
     
     
@@ -214,12 +217,15 @@
     
     
     
+    
+    
+    
     
     
     
     
     
-    
+    
     
     
     
diff --git a/src/core/core.vcxproj.filters b/src/core/core.vcxproj.filters
index edf34ce2fc..b6c1d5b935 100644
--- a/src/core/core.vcxproj.filters
+++ b/src/core/core.vcxproj.filters
@@ -31,6 +31,9 @@
     
       {13ef9860-2ba0-47e9-a93d-b4052adab269}
     
+    
+      {8089d94b-5faa-43dc-854b-ffd2fa2e7fe3}
+    
   
   
     
@@ -81,9 +84,6 @@
     
       hle
     
-    
-      hle
-    
     
       hle\service
     
@@ -147,12 +147,24 @@
     
       arm\interpreter\mmu
     
-    
-      arm
-    
     
       arm\interpreter\mmu
     
+    
+      hle\kernel
+    
+    
+      hle\kernel
+    
+    
+      hle
+    
+    
+      hle\kernel
+    
+    
+      arm\interpreter
+    
   
   
     
@@ -217,9 +229,6 @@
     
       hle\service
     
-    
-      hle
-    
     
       hle\service
     
@@ -274,6 +283,18 @@
     
       arm\interpreter\mmu
     
+    
+      hle\kernel
+    
+    
+      hle\kernel
+    
+    
+      hle
+    
+    
+      hle\kernel
+    
   
   
     
diff --git a/src/core/hle/function_wrappers.h b/src/core/hle/function_wrappers.h
index d934eafb4a..801865d493 100644
--- a/src/core/hle/function_wrappers.h
+++ b/src/core/hle/function_wrappers.h
@@ -719,17 +719,27 @@ template void WrapI_VU(){
     RETURN(retval);
 }
 
+template void WrapI_VVU(){
+    u32 retval = func(Memory::GetPointer(PARAM(0)), Memory::GetPointer(PARAM(1)), PARAM(2));
+    RETURN(retval);
+}
+
 template void WrapI_VUVI(){
     u32 retval = func(Memory::GetPointer(PARAM(0)), PARAM(1), Memory::GetPointer(PARAM(2)), PARAM(3));
     RETURN(retval);
 }
 
 template void WrapI_VUUUUU(){
-    u32 retval = func(Memory::GetPointer(PARAM(0)), PARAM(1), PARAM(2), PARAM(3), PARAM(4), PARAM(5));
+    u32 retval = func(NULL, PARAM(0), PARAM(1), PARAM(2), PARAM(3), PARAM(4));
     RETURN(retval);
 }
 
 template void WrapI_US64() {
-    int retval = func(PARAM(0), PARAM64(2));
+    int retval = func(PARAM(0), PARAM64(1));
+    RETURN(retval);
+}
+
+template void WrapI_VVUUS64() {
+    int retval = func(Memory::GetPointer(PARAM(0)), Memory::GetPointer(PARAM(1)), PARAM(2), PARAM(3), PARAM(4));
     RETURN(retval);
 }
diff --git a/src/core/hle/hle.cpp b/src/core/hle/hle.cpp
index be151665b2..080c36abf4 100644
--- a/src/core/hle/hle.cpp
+++ b/src/core/hle/hle.cpp
@@ -6,7 +6,7 @@
 
 #include "core/mem_map.h"
 #include "core/hle/hle.h"
-#include "core/hle/syscall.h"
+#include "core/hle/svc.h"
 #include "core/hle/service/service.h"
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -15,17 +15,17 @@ namespace HLE {
 
 static std::vector g_module_db;
 
-const FunctionDef* GetSyscallInfo(u32 opcode) {
+const FunctionDef* GetSVCInfo(u32 opcode) {
     u32 func_num = opcode & 0xFFFFFF; // 8 bits
     if (func_num > 0xFF) {
-        ERROR_LOG(HLE,"Unknown syscall: 0x%02X", func_num); 
+        ERROR_LOG(HLE,"Unknown SVC: 0x%02X", func_num); 
         return NULL;
     }
     return &g_module_db[0].func_table[func_num];
 }
 
-void CallSyscall(u32 opcode) {
-    const FunctionDef *info = GetSyscallInfo(opcode);
+void CallSVC(u32 opcode) {
+    const FunctionDef *info = GetSVCInfo(opcode);
 
     if (!info) {
         return;
@@ -33,17 +33,28 @@ void CallSyscall(u32 opcode) {
     if (info->func) {
         info->func();
     } else {
-        ERROR_LOG(HLE, "Unimplemented SysCall function %s(..)", info->name.c_str());
+        ERROR_LOG(HLE, "Unimplemented SVC function %s(..)", info->name.c_str());
     }
 }
 
+void EatCycles(u32 cycles) {
+    // TODO: ImplementMe
+}
+
+void ReSchedule(const char *reason) {
+#ifdef _DEBUG
+    _dbg_assert_msg_(HLE, reason != 0 && strlen(reason) < 256, "ReSchedule: Invalid or too long reason.");
+#endif
+    // TODO: ImplementMe
+}
+
 void RegisterModule(std::string name, int num_functions, const FunctionDef* func_table) {
     ModuleDef module = {name, num_functions, func_table};
     g_module_db.push_back(module);
 }
 
 void RegisterAllModules() {
-    Syscall::Register();
+    SVC::Register();
 }
 
 void Init() {
diff --git a/src/core/hle/hle.h b/src/core/hle/hle.h
index 42f37e29cf..c075147c39 100644
--- a/src/core/hle/hle.h
+++ b/src/core/hle/hle.h
@@ -34,7 +34,11 @@ struct ModuleDef {
 
 void RegisterModule(std::string name, int num_functions, const FunctionDef *func_table);
 
-void CallSyscall(u32 opcode);
+void CallSVC(u32 opcode);
+
+void EatCycles(u32 cycles);
+
+void ReSchedule(const char *reason);
 
 void Init();
 
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
new file mode 100644
index 0000000000..de80de8937
--- /dev/null
+++ b/src/core/hle/kernel/kernel.cpp
@@ -0,0 +1,158 @@
+// Copyright 2014 Citra Emulator Project / PPSSPP Project
+// Licensed under GPLv2
+// Refer to the license.txt file included.  
+
+#pragma once
+
+#include 
+
+#include "common/common.h"
+
+#include "core/core.h"
+#include "core/hle/kernel/kernel.h"
+#include "core/hle/kernel/thread.h"
+
+namespace Kernel {
+
+ObjectPool g_object_pool;
+
+ObjectPool::ObjectPool() {
+    memset(occupied, 0, sizeof(bool) * MAX_COUNT);
+    next_id = INITIAL_NEXT_ID;
+}
+
+Handle ObjectPool::Create(Object* obj, int range_bottom, int range_top) {
+    if (range_top > MAX_COUNT) {
+        range_top = MAX_COUNT;
+    }
+    if (next_id >= range_bottom && next_id < range_top) {
+        range_bottom = next_id++;
+    }
+    for (int i = range_bottom; i < range_top; i++) {
+        if (!occupied[i]) {
+            occupied[i] = true;
+            pool[i] = obj;
+            pool[i]->handle = i + HANDLE_OFFSET;
+            return i + HANDLE_OFFSET;
+        }
+    }
+    ERROR_LOG(HLE, "Unable to allocate kernel object, too many objects slots in use.");
+    return 0;
+}
+
+bool ObjectPool::IsValid(Handle handle) {
+    int index = handle - HANDLE_OFFSET;
+    if (index < 0)
+        return false;
+    if (index >= MAX_COUNT)
+        return false;
+
+    return occupied[index];
+}
+
+void ObjectPool::Clear() {
+    for (int i = 0; i < MAX_COUNT; i++) {
+        //brutally clear everything, no validation
+        if (occupied[i])
+            delete pool[i];
+        occupied[i] = false;
+    }
+    memset(pool, 0, sizeof(Object*)*MAX_COUNT);
+    next_id = INITIAL_NEXT_ID;
+}
+
+Object* &ObjectPool::operator [](Handle handle)
+{
+    _dbg_assert_msg_(KERNEL, IsValid(handle), "GRABBING UNALLOCED KERNEL OBJ");
+    return pool[handle - HANDLE_OFFSET];
+}
+
+void ObjectPool::List() {
+    for (int i = 0; i < MAX_COUNT; i++) {
+        if (occupied[i]) {
+            if (pool[i]) {
+                INFO_LOG(KERNEL, "KO %i: %s \"%s\"", i + HANDLE_OFFSET, pool[i]->GetTypeName(), 
+                    pool[i]->GetName());
+            }
+        }
+    }
+}
+
+int ObjectPool::GetCount() {
+    int count = 0;
+    for (int i = 0; i < MAX_COUNT; i++) {
+        if (occupied[i])
+            count++;
+    }
+    return count;
+}
+
+Object* ObjectPool::CreateByIDType(int type) {
+    // Used for save states.  This is ugly, but what other way is there?
+    switch (type) {
+    //case SCE_KERNEL_TMID_Alarm:
+    //    return __KernelAlarmObject();
+    //case SCE_KERNEL_TMID_EventFlag:
+    //    return __KernelEventFlagObject();
+    //case SCE_KERNEL_TMID_Mbox:
+    //    return __KernelMbxObject();
+    //case SCE_KERNEL_TMID_Fpl:
+    //    return __KernelMemoryFPLObject();
+    //case SCE_KERNEL_TMID_Vpl:
+    //    return __KernelMemoryVPLObject();
+    //case PPSSPP_KERNEL_TMID_PMB:
+    //    return __KernelMemoryPMBObject();
+    //case PPSSPP_KERNEL_TMID_Module:
+    //    return __KernelModuleObject();
+    //case SCE_KERNEL_TMID_Mpipe:
+    //    return __KernelMsgPipeObject();
+    //case SCE_KERNEL_TMID_Mutex:
+    //    return __KernelMutexObject();
+    //case SCE_KERNEL_TMID_LwMutex:
+    //    return __KernelLwMutexObject();
+    //case SCE_KERNEL_TMID_Semaphore:
+    //    return __KernelSemaphoreObject();
+    //case SCE_KERNEL_TMID_Callback:
+    //    return __KernelCallbackObject();
+    //case SCE_KERNEL_TMID_Thread:
+    //    return __KernelThreadObject();
+    //case SCE_KERNEL_TMID_VTimer:
+    //    return __KernelVTimerObject();
+    //case SCE_KERNEL_TMID_Tlspl:
+    //    return __KernelTlsplObject();
+    //case PPSSPP_KERNEL_TMID_File:
+    //    return __KernelFileNodeObject();
+    //case PPSSPP_KERNEL_TMID_DirList:
+    //    return __KernelDirListingObject();
+
+    default:
+        ERROR_LOG(COMMON, "Unable to load state: could not find object type %d.", type);
+        return NULL;
+    }
+}
+
+void Init() {
+    Kernel::ThreadingInit();
+}
+
+void Shutdown() {
+    Kernel::ThreadingShutdown();
+}
+
+/**
+ * Loads executable stored at specified address
+ * @entry_point Entry point in memory of loaded executable
+ * @return True on success, otherwise false
+ */
+bool LoadExec(u32 entry_point) {
+    Init();
+    
+    Core::g_app_core->SetPC(entry_point);
+
+    // 0x30 is the typical main thread priority I've seen used so far
+    Handle thread = Kernel::SetupMainThread(0x30);
+
+    return true;
+}
+
+} // namespace
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h
new file mode 100644
index 0000000000..7cd79c2c41
--- /dev/null
+++ b/src/core/hle/kernel/kernel.h
@@ -0,0 +1,154 @@
+// Copyright 2014 Citra Emulator Project / PPSSPP Project
+// Licensed under GPLv2
+// Refer to the license.txt file included.  
+
+#pragma once
+
+#include "common/common.h"
+
+typedef u32 Handle;
+typedef s32 Result;
+
+namespace Kernel {
+
+enum class HandleType : u32 {
+    Unknown         = 0,
+    Port            = 1,
+    Service         = 2,
+    Event           = 3,
+    Mutex           = 4,
+    SharedMemory    = 5,
+    Redirection     = 6,
+    Thread          = 7,
+    Process         = 8,
+    Arbiter         = 9,
+    File            = 10,
+    Semaphore       = 11,
+};
+    
+enum {
+    MAX_NAME_LENGTH     = 0x100,
+    DEFAULT_STACK_SIZE  = 0x4000,
+};
+
+class ObjectPool;
+
+class Object : NonCopyable {
+    friend class ObjectPool;
+    u32 handle;
+public:
+    virtual ~Object() {}
+    Handle GetHandle() const { return handle; }
+    virtual const char *GetTypeName() { return "[BAD KERNEL OBJECT TYPE]"; }
+    virtual const char *GetName() { return "[UNKNOWN KERNEL OBJECT]"; }
+    virtual Kernel::HandleType GetHandleType() const = 0;
+};
+
+class ObjectPool : NonCopyable {
+public:
+    ObjectPool();
+    ~ObjectPool() {}
+
+    // Allocates a handle within the range and inserts the object into the map.
+    Handle Create(Object* obj, int range_bottom=INITIAL_NEXT_ID, int range_top=0x7FFFFFFF);
+
+    static Object* CreateByIDType(int type);
+
+    template 
+    u32 Destroy(Handle handle) {
+        u32 error;
+        if (Get(handle, error)) {
+            occupied[handle - HANDLE_OFFSET] = false;
+            delete pool[handle - HANDLE_OFFSET];
+        }
+        return error;
+    };
+
+    bool IsValid(Handle handle);
+
+    template 
+    T* Get(Handle handle, u32& outError) {
+        if (handle < HANDLE_OFFSET || handle >= HANDLE_OFFSET + MAX_COUNT || !occupied[handle - HANDLE_OFFSET]) {
+            // Tekken 6 spams 0x80020001 gets wrong with no ill effects, also on the real PSP
+            if (handle != 0 && (u32)handle != 0x80020001) {
+                WARN_LOG(KERNEL, "Kernel: Bad object handle %i (%08x)", handle, handle);
+            }
+            outError = 0;//T::GetMissingErrorCode();
+            return 0;
+        } else {
+            // Previously we had a dynamic_cast here, but since RTTI was disabled traditionally,
+            // it just acted as a static case and everything worked. This means that we will never
+            // see the Wrong type object error below, but we'll just have to live with that danger.
+            T* t = static_cast(pool[handle - HANDLE_OFFSET]);
+            if (t == 0 || t->GetHandleType() != T::GetStaticHandleType()) {
+                WARN_LOG(KERNEL, "Kernel: Wrong object type for %i (%08x)", handle, handle);
+                outError = 0;//T::GetMissingErrorCode();
+                return 0;
+            }
+            outError = 0;//SCE_KERNEL_ERROR_OK;
+            return t;
+        }
+    }
+
+    // ONLY use this when you know the handle is valid.
+    template 
+    T *GetFast(Handle handle) {
+        const Handle realHandle = handle - HANDLE_OFFSET;
+        _dbg_assert_(KERNEL, realHandle >= 0 && realHandle < MAX_COUNT && occupied[realHandle]);
+        return static_cast(pool[realHandle]);
+    }
+
+    template 
+    void Iterate(bool func(T*, ArgT), ArgT arg) {
+        int type = T::GetStaticIDType();
+        for (int i = 0; i < MAX_COUNT; i++)
+        {
+            if (!occupied[i])
+                continue;
+            T* t = static_cast(pool[i]);
+            if (t->GetIDType() == type) {
+                if (!func(t, arg))
+                    break;
+            }
+        }
+    }
+
+    bool GetIDType(Handle handle, HandleType* type) const {
+        if ((handle < HANDLE_OFFSET) || (handle >= HANDLE_OFFSET + MAX_COUNT) || 
+            !occupied[handle - HANDLE_OFFSET]) {
+            ERROR_LOG(KERNEL, "Kernel: Bad object handle %i (%08x)", handle, handle);
+            return false;
+        }
+        Object* t = pool[handle - HANDLE_OFFSET];
+        *type = t->GetHandleType();
+        return true;
+    }
+
+    Object* &operator [](Handle handle);
+    void List();
+    void Clear();
+    int GetCount();
+
+private:
+    
+    enum {
+        MAX_COUNT       = 0x1000,
+        HANDLE_OFFSET   = 0x100,
+        INITIAL_NEXT_ID = 0x10,
+    };
+
+    Object* pool[MAX_COUNT];
+    bool    occupied[MAX_COUNT];
+    int     next_id;
+};
+
+extern ObjectPool g_object_pool;
+
+/**
+ * Loads executable stored at specified address
+ * @entry_point Entry point in memory of loaded executable
+ * @return True on success, otherwise false
+ */
+bool LoadExec(u32 entry_point);
+
+} // namespace
diff --git a/src/core/hle/kernel/mutex.cpp b/src/core/hle/kernel/mutex.cpp
new file mode 100644
index 0000000000..019efbc785
--- /dev/null
+++ b/src/core/hle/kernel/mutex.cpp
@@ -0,0 +1,132 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2
+// Refer to the license.txt file included.  
+
+#include