From 57634937a40e3d655037eb5f2115d855d756221b Mon Sep 17 00:00:00 2001
From: miclebrick <miclebrick@outlook.com>
Date: Wed, 8 Aug 2018 15:30:52 -0400
Subject: [PATCH] Add Early Warning Feature to WatchDog

Detect when the server has been hung for a long duration, and start printing
thread dumps at an interval until the point of crash.

This will help diagnose what was going on in that time before the crash.

diff --git a/src/main/java/com/destroystokyo/paper/PaperConfig.java b/src/main/java/com/destroystokyo/paper/PaperConfig.java
index 046bf33f6..b23d45f7d 100644
--- a/src/main/java/com/destroystokyo/paper/PaperConfig.java
+++ b/src/main/java/com/destroystokyo/paper/PaperConfig.java
@@ -24,6 +24,8 @@ import org.bukkit.configuration.InvalidConfigurationException;
 import org.bukkit.configuration.file.YamlConfiguration;
 import co.aikar.timings.Timings;
 import co.aikar.timings.TimingsManager;
+import org.spigotmc.SpigotConfig;
+import org.spigotmc.WatchdogThread;
 
 public class PaperConfig {
 
@@ -328,6 +330,14 @@ public class PaperConfig {
         }
     }
 
+    public static int watchdogPrintEarlyWarningEvery = 5000;
+    public static int watchdogPrintEarlyWarningDelay = 10000;
+    private static void watchdogEarlyWarning() {
+        watchdogPrintEarlyWarningEvery = getInt("settings.watchdog.early-warning-every", 5000);
+        watchdogPrintEarlyWarningDelay = getInt("settings.watchdog.early-warning-delay", 10000);
+        WatchdogThread.doStart(SpigotConfig.timeoutTime, SpigotConfig.restartOnCrash );
+    }
+
     public static int tabSpamIncrement = 2;
     public static int tabSpamLimit = 500;
     private static void tabSpamLimiters() {
diff --git a/src/main/java/net/minecraft/server/MinecraftServer.java b/src/main/java/net/minecraft/server/MinecraftServer.java
index 74c97b5db..5fdd65b08 100644
--- a/src/main/java/net/minecraft/server/MinecraftServer.java
+++ b/src/main/java/net/minecraft/server/MinecraftServer.java
@@ -768,6 +768,7 @@ public abstract class MinecraftServer implements IAsyncTaskHandler, IMojangStati
                 this.a(this.n);
 
                 // Spigot start
+                org.spigotmc.WatchdogThread.hasStarted = true; // Paper
                 Arrays.fill( recentTps, 20 );
                 long start = System.nanoTime(), lastTick = start - TICK_TIME, catchupTime = 0, curTime, wait, tickSection = start; // Paper - Further improve server tick loop
                 while (this.isRunning) {
diff --git a/src/main/java/org/spigotmc/SpigotConfig.java b/src/main/java/org/spigotmc/SpigotConfig.java
index 4ac0ab997..0c45d75ad 100644
--- a/src/main/java/org/spigotmc/SpigotConfig.java
+++ b/src/main/java/org/spigotmc/SpigotConfig.java
@@ -226,7 +226,7 @@ public class SpigotConfig
         restartScript = getString( "settings.restart-script", restartScript );
         restartMessage = transform( getString( "messages.restart", "Server is restarting" ) );
         commands.put( "restart", new RestartCommand( "restart" ) );
-        WatchdogThread.doStart( timeoutTime, restartOnCrash );
+        //WatchdogThread.doStart( timeoutTime, restartOnCrash ); // Paper - moved to PaperConfig
     }
 
     public static boolean bungee;
diff --git a/src/main/java/org/spigotmc/WatchdogThread.java b/src/main/java/org/spigotmc/WatchdogThread.java
index 57a4748a3..19df383e0 100644
--- a/src/main/java/org/spigotmc/WatchdogThread.java
+++ b/src/main/java/org/spigotmc/WatchdogThread.java
@@ -5,6 +5,7 @@ import java.lang.management.MonitorInfo;
 import java.lang.management.ThreadInfo;
 import java.util.logging.Level;
 import java.util.logging.Logger;
+import com.destroystokyo.paper.PaperConfig;
 import net.minecraft.server.MinecraftServer;
 import org.bukkit.Bukkit;
 
@@ -13,6 +14,10 @@ public class WatchdogThread extends Thread
 
     private static WatchdogThread instance;
     private final long timeoutTime;
+    private final long earlyWarningEvery; // Paper - Timeout time for just printing a dump but not restarting
+    private final long earlyWarningDelay; // Paper
+    public static volatile boolean hasStarted; // Paper
+    private long lastEarlyWarning; // Paper - Keep track of short dump times to avoid spamming console with short dumps
     private final boolean restart;
     private volatile long lastTick;
     private volatile boolean stopping;
@@ -22,6 +27,8 @@ public class WatchdogThread extends Thread
         super( "Paper Watchdog Thread" );
         this.timeoutTime = timeoutTime;
         this.restart = restart;
+        earlyWarningEvery = Math.min(PaperConfig.watchdogPrintEarlyWarningEvery, timeoutTime); // Paper
+        earlyWarningDelay = Math.min(PaperConfig.watchdogPrintEarlyWarningDelay, timeoutTime); // Paper
     }
 
     public static void doStart(int timeoutTime, boolean restart)
@@ -51,10 +58,19 @@ public class WatchdogThread extends Thread
     {
         while ( !stopping )
         {
-            //
-            if ( lastTick != 0 && System.currentTimeMillis() > lastTick + timeoutTime && !Boolean.getBoolean("disable.watchdog")) // Paper - Add property to disable
+            // Paper start
+            long currentTime = System.currentTimeMillis();
+            if ( lastTick != 0 && currentTime > lastTick + earlyWarningEvery && !Boolean.getBoolean("disable.watchdog") )
             {
+                boolean isLongTimeout = currentTime > lastTick + timeoutTime;
+                // Don't spam early warning dumps
+                if ( !isLongTimeout && (earlyWarningEvery <= 0 || !hasStarted || currentTime < lastEarlyWarning + earlyWarningEvery || currentTime < lastTick + earlyWarningDelay)) continue;
+                lastEarlyWarning = currentTime;
+                // Paper end
                 Logger log = Bukkit.getServer().getLogger();
+                // Paper start - Different message when it's a short timeout
+                if ( isLongTimeout )
+                {
                 log.log( Level.SEVERE, "The server has stopped responding!" );
                 log.log( Level.SEVERE, "Please report this to https://github.com/PaperMC/Paper/issues" );
                 log.log( Level.SEVERE, "Be sure to include ALL relevant console errors and Minecraft crash reports" );
@@ -79,29 +95,46 @@ public class WatchdogThread extends Thread
                     }
                 }
                 // Paper end
+                } else
+                {
+                    log.log(Level.SEVERE, "--- DO NOT REPORT THIS TO PAPER - THIS IS NOT A BUG OR A CRASH ---");
+                    log.log(Level.SEVERE, "The server has not responded for " + (currentTime - lastTick) / 1000 + " seconds! Creating thread dump");
+                }
+                // Paper end - Different message for short timeout
                 log.log( Level.SEVERE, "------------------------------" );
                 log.log( Level.SEVERE, "Server thread dump (Look for plugins here before reporting to Paper!):" );
                 dumpThread( ManagementFactory.getThreadMXBean().getThreadInfo( MinecraftServer.getServer().primaryThread.getId(), Integer.MAX_VALUE ), log );
                 log.log( Level.SEVERE, "------------------------------" );
                 //
+                // Paper start - Only print full dump on long timeouts
+                if ( isLongTimeout )
+                {
                 log.log( Level.SEVERE, "Entire Thread Dump:" );
                 ThreadInfo[] threads = ManagementFactory.getThreadMXBean().dumpAllThreads( true, true );
                 for ( ThreadInfo thread : threads )
                 {
                     dumpThread( thread, log );
                 }
+                } else {
+                    log.log(Level.SEVERE, "--- DO NOT REPORT THIS TO PAPER - THIS IS NOT A BUG OR A CRASH ---");
+                }
+
+
                 log.log( Level.SEVERE, "------------------------------" );
 
+                if ( isLongTimeout )
+                {
                 if ( restart )
                 {
                     RestartCommand.restart();
                 }
                 break;
+                } // Paper end
             }
 
             try
             {
-                sleep( 10000 );
+                sleep( 1000 ); // Paper - Reduce check time to every second instead of every ten seconds, more consistent and allows for short timeout
             } catch ( InterruptedException ex )
             {
                 interrupt();
-- 
2.18.0