summaryrefslogtreecommitdiffstats
path: root/source
diff options
context:
space:
mode:
Diffstat (limited to 'source')
-rw-r--r--source/DeadlockDetect.cpp155
-rw-r--r--source/DeadlockDetect.h70
-rw-r--r--source/Root.cpp17
3 files changed, 238 insertions, 4 deletions
diff --git a/source/DeadlockDetect.cpp b/source/DeadlockDetect.cpp
new file mode 100644
index 000000000..960038f81
--- /dev/null
+++ b/source/DeadlockDetect.cpp
@@ -0,0 +1,155 @@
+
+// DeadlockDetect.cpp
+
+// Declares the cDeadlockDetect class that tries to detect deadlocks and aborts the server when it detects one
+
+#include "Globals.h"
+#include "DeadlockDetect.h"
+#include "Root.h"
+#include "World.h"
+
+
+
+
+
+/// Number of milliseconds per cycle
+const int CYCLE_MILLISECONDS = 500;
+
+/// When the number of cycles for the same world age hits this value, it is considered a deadlock
+const int NUM_CYCLES_LIMIT = 40; // 40 = twenty seconds
+
+
+
+
+
+cDeadlockDetect::cDeadlockDetect(void) :
+ super("DeadlockDetect")
+{
+}
+
+
+
+
+
+bool cDeadlockDetect::Start(void)
+{
+ // Read the initial world data:
+ class cFillIn :
+ public cWorldListCallback
+ {
+ public:
+ cFillIn(cDeadlockDetect * a_Detect) :
+ m_Detect(a_Detect)
+ {
+ }
+
+ virtual bool Item(cWorld * a_World) override
+ {
+ m_Detect->SetWorldAge(a_World->GetName(), a_World->GetWorldAge());
+ return false;
+ }
+
+ protected:
+ cDeadlockDetect * m_Detect;
+ } FillIn(this);
+ cRoot::Get()->ForEachWorld(FillIn);
+ return super::Start();
+}
+
+
+
+
+
+void cDeadlockDetect::Stop(void)
+{
+ m_EvtTerminate.Set();
+ super::Stop();
+}
+
+
+
+
+
+void cDeadlockDetect::Execute(void)
+{
+ // Loop until the event is signalled
+ while (m_EvtTerminate.Wait(CYCLE_MILLISECONDS) == cEvent::wrTimeout)
+ {
+ // Check the world ages:
+ class cChecker :
+ public cWorldListCallback
+ {
+ public:
+ cChecker(cDeadlockDetect * a_Detect) :
+ m_Detect(a_Detect)
+ {
+ }
+
+ protected:
+ cDeadlockDetect * m_Detect;
+
+ virtual bool Item(cWorld * a_World) override
+ {
+ m_Detect->CheckWorldAge(a_World->GetName(), a_World->GetWorldAge());
+ return false;
+ }
+ } Checker(this);
+ cRoot::Get()->ForEachWorld(Checker);
+ } // while (should run)
+}
+
+
+
+
+
+void cDeadlockDetect::SetWorldAge(const AString & a_WorldName, Int64 a_Age)
+{
+ m_WorldAges[a_WorldName].m_Age = a_Age;
+ m_WorldAges[a_WorldName].m_NumCyclesSame = 0;
+}
+
+
+
+
+
+void cDeadlockDetect::CheckWorldAge(const AString & a_WorldName, Int64 a_Age)
+{
+ WorldAges::iterator itr = m_WorldAges.find(a_WorldName);
+ if (itr == m_WorldAges.end())
+ {
+ ASSERT(!"Unknown world in cDeadlockDetect");
+ return;
+ }
+ if (itr->second.m_Age == a_Age)
+ {
+ itr->second.m_NumCyclesSame += 1;
+ if (itr->second.m_NumCyclesSame > NUM_CYCLES_LIMIT)
+ {
+ DeadlockDetected();
+ return;
+ }
+ }
+ else
+ {
+ itr->second.m_Age = a_Age;
+ itr->second.m_NumCyclesSame = 0;
+ }
+}
+
+
+
+
+
+void cDeadlockDetect::DeadlockDetected(void)
+{
+ ASSERT(!"Deadlock detected");
+
+ // TODO: Make a crashdump / coredump
+
+ // Crash the server intentionally:
+ *((int *)0) = 0;
+}
+
+
+
+
diff --git a/source/DeadlockDetect.h b/source/DeadlockDetect.h
new file mode 100644
index 000000000..bbd76826a
--- /dev/null
+++ b/source/DeadlockDetect.h
@@ -0,0 +1,70 @@
+
+// DeadlockDetect.h
+
+// Declares the cDeadlockDetect class that tries to detect deadlocks and aborts the server when it detects one
+
+/*
+This class simply monitors each world's m_WorldAge, which is expected to grow on each tick.
+If the world age doesn't grow for several seconds, it's either because the server is super-overloaded,
+or because the world tick thread hangs in a deadlock. We presume the latter and therefore kill the server.
+Once we learn to write crashdumps programmatically, we should do so just before killing, to enable debugging.
+*/
+
+
+
+#pragma once
+
+#include "OSSupport/IsThread.h"
+
+
+
+
+
+class cDeadlockDetect :
+ public cIsThread
+{
+ typedef cIsThread super;
+
+public:
+ cDeadlockDetect(void);
+
+ /// Starts the detection. Hides cIsThread's Start, because we need some initialization
+ bool Start(void);
+
+ /// Stops the detection. Hides cIsThread's Stop, because we need to signal m_EvtTerminate
+ void Stop(void);
+
+protected:
+ struct sWorldAge
+ {
+ /// Last m_WorldAge that has been detected in this world
+ Int64 m_Age;
+
+ /// Number of cycles for which the age has been the same
+ int m_NumCyclesSame;
+ } ;
+
+ /// Maps world name -> sWorldAge
+ typedef std::map<AString, sWorldAge> WorldAges;
+
+ WorldAges m_WorldAges;
+
+ cEvent m_EvtTerminate;
+
+
+ // cIsThread overrides:
+ virtual void Execute(void) override;
+
+ /// Sets the initial world age
+ void SetWorldAge(const AString & a_WorldName, Int64 a_Age);
+
+ /// Checks if the world's age has changed, updates the world's stats; calls DeadlockDetected() if deadlock detected
+ void CheckWorldAge(const AString & a_WorldName, Int64 a_Age);
+
+ /// Called when a deadlock is detected. Aborts the server.
+ void DeadlockDetected(void);
+} ;
+
+
+
+
diff --git a/source/Root.cpp b/source/Root.cpp
index 166932cf2..07de0775c 100644
--- a/source/Root.cpp
+++ b/source/Root.cpp
@@ -16,6 +16,7 @@
#include "Chunk.h"
#include "Protocol/ProtocolRecognizer.h" // for protocol version constants
#include "CommandOutput.h"
+#include "DeadlockDetect.h"
#include "../iniFile/iniFile.h"
@@ -90,6 +91,7 @@ void cRoot::InputThread(void * a_Params)
void cRoot::Start(void)
{
+ cDeadlockDetect dd;
delete m_Log;
m_Log = new cMCLogger();
@@ -162,6 +164,9 @@ void cRoot::Start(void)
LOG("Starting worlds...");
StartWorlds();
+ LOG("Starting deadlock detector...");
+ dd.Start();
+
LOG("Starting server...");
m_Server->Start();
@@ -183,17 +188,21 @@ void cRoot::Start(void)
// Deallocate stuffs
LOG("Shutting down server...");
- m_Server->Shutdown(); // This waits for threads to stop and d/c clients
+ m_Server->Shutdown();
+
+ LOG("Shutting down deadlock detector...");
+ dd.Stop();
+
LOG("Stopping world threads...");
StopWorlds();
+
LOG("Stopping authenticator...");
m_Authenticator.Stop();
-
LOG("Freeing MonsterConfig...");
- delete m_MonsterConfig; m_MonsterConfig = 0;
+ delete m_MonsterConfig; m_MonsterConfig = NULL;
LOG("Stopping WebAdmin...");
- delete m_WebAdmin; m_WebAdmin = 0;
+ delete m_WebAdmin; m_WebAdmin = NULL;
LOG("Unloading recipes...");
delete m_FurnaceRecipe; m_FurnaceRecipe = NULL;
delete m_CraftingRecipes; m_CraftingRecipes = NULL;