Skip to content

Commit f8fcc65

Browse files
Adding restart reason (#11191)
* Improving logging to capture a reason for JobHost restarts.
1 parent 10c8042 commit f8fcc65

File tree

20 files changed

+102
-75
lines changed

20 files changed

+102
-75
lines changed

release_notes.md

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
### Release notes
2-
3-
<!-- Please add your release notes in the following format:
4-
- My change description (#PR)
5-
-->
6-
- Add JitTrace Files for v4.1042
7-
- Updating OTel related nuget packages (#11216)
8-
- Moving to version 1.5.7 of Microsoft.Azure.AppService.Middleware.Functions (https://github.com/Azure/azure-functions-host/pull/11231)
9-
- Refactor code to move the logic to search for WorkerConfigs to a default worker configuration resolver (#11219)
10-
- Update Node.js Worker Version to [3.11.0](https://github.com/Azure/azure-functions-nodejs-worker/releases/tag/v3.11.0)
11-
1+
### Release notes
2+
3+
<!-- Please add your release notes in the following format:
4+
- My change description (#PR)
5+
-->
6+
- Add JitTrace Files for v4.1042
7+
- Updating OTel related nuget packages (#11216)
8+
- Moving to version 1.5.7 of Microsoft.Azure.AppService.Middleware.Functions (https://github.com/Azure/azure-functions-host/pull/11231)
9+
- Refactor code to move the logic to search for WorkerConfigs to a default worker configuration resolver (#11219)
10+
- Update Node.js Worker Version to [3.11.0](https://github.com/Azure/azure-functions-nodejs-worker/releases/tag/v3.11.0)
11+
- Adding restart reason to the logs (#11191)

src/WebJobs.Script.Grpc/Channel/GrpcWorkerChannel.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ internal GrpcWorkerChannel(
140140
_eventSubscriptions.Add(_eventManager.OfType<FileEvent>()
141141
.Where(msg => _workerConfig.Description.Extensions.Contains(Path.GetExtension(msg.FileChangeArguments.FullPath)))
142142
.Throttle(TimeSpan.FromMilliseconds(300)) // debounce
143-
.Subscribe(msg => _eventManager.Publish(new HostRestartEvent())));
143+
.Subscribe(msg => _eventManager.Publish(new HostRestartEvent($"Worker monitored file changed: '{msg.FileChangeArguments.Name}'."))));
144144

145145
_startLatencyMetric = metricsLogger?.LatencyEvent(string.Format(MetricEventNames.WorkerInitializeLatency, workerConfig.Description.Language, attemptCount));
146146

src/WebJobs.Script.WebHost/Controllers/HostController.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,7 @@ public async Task<IActionResult> Resume([FromServices] IScriptHostManager script
290290
if (drainModeManager.IsDrainModeEnabled)
291291
{
292292
_logger.LogDebug("Starting a new host");
293-
await scriptHostManager.RestartHostAsync();
293+
await scriptHostManager.RestartHostAsync("Resuming from drain mode.", CancellationToken.None);
294294
}
295295

296296
var status = new ResumeStatus { State = scriptHostManager.State };
@@ -451,7 +451,7 @@ public async Task<IActionResult> GetTriggers()
451451
[Authorize(Policy = PolicyNames.AdminAuthLevel)]
452452
public IActionResult Restart([FromServices] IScriptHostManager hostManager)
453453
{
454-
Task ignore = hostManager.RestartHostAsync();
454+
Task ignore = hostManager.RestartHostAsync("Restart API called.");
455455

456456
var hostOptionsValue = _applicationHostOptions.Value;
457457
var response = new HostRestartResponse()

src/WebJobs.Script.WebHost/Diagnostics/Extensions/ScriptHostServiceLoggerExtension.cs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -126,11 +126,11 @@ public static class ScriptHostServiceLoggerExtension
126126
new EventId(518, nameof(ActiveHostChanging)),
127127
"Active host changing from '{oldHostInstanceId}' to '{newHostInstanceId}'.");
128128

129-
private static readonly Action<ILogger, Exception> _enteringRestart =
130-
LoggerMessage.Define(
129+
private static readonly Action<ILogger, string, Exception> _enteringRestart =
130+
LoggerMessage.Define<string>(
131131
LogLevel.Debug,
132132
new EventId(519, nameof(EnteringRestart)),
133-
"Restart requested.");
133+
"Restart requested. Reason '{restartReason}'");
134134

135135
private static readonly Action<ILogger, Exception> _restartBeforeStart =
136136
LoggerMessage.Define(
@@ -321,9 +321,9 @@ public static void ActiveHostChanging(this ILogger logger, string oldHostInstanc
321321
_activeHostChanging(logger, oldHostInstanceId, newHostInstanceId, null);
322322
}
323323

324-
public static void EnteringRestart(this ILogger logger)
324+
public static void EnteringRestart(this ILogger logger, string reason)
325325
{
326-
_enteringRestart(logger, null);
326+
_enteringRestart(logger, reason, null);
327327
}
328328

329329
public static void RestartBeforeStart(this ILogger logger)

src/WebJobs.Script.WebHost/FileMonitoringService.cs

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ public class FileMonitoringService : IFileMonitoringService, IDisposable
3232
private readonly ILogger _logger;
3333
private readonly ILogger<FileMonitoringService> _typedLogger;
3434
private readonly IList<IDisposable> _eventSubscriptions = new List<IDisposable>();
35-
private readonly Func<Task> _restart;
35+
private readonly Func<string, Task> _restart;
3636
private readonly Action _shutdown;
3737
private readonly ImmutableArray<string> _rootDirectorySnapshot;
3838
private AutoRecoveringFileSystemWatcher _debugModeFileWatcher;
@@ -112,27 +112,27 @@ private void ResumeRestartIfScheduled()
112112
using (System.Threading.ExecutionContext.SuppressFlow())
113113
{
114114
_typedLogger.LogDebug("Resuming scheduled restart.");
115-
Task.Run(async () => await ScheduleRestartAsync());
115+
Task.Run(async () => await ScheduleRestartAsync("Resuming scheduled restart."));
116116
}
117117
}
118118
}
119119

120-
private async Task ScheduleRestartAsync(bool shutdown)
120+
private async Task ScheduleRestartAsync(string reason, bool shutdown)
121121
{
122122
_restartScheduled = true;
123123
if (shutdown)
124124
{
125125
_shutdownScheduled = true;
126126
}
127127

128-
await ScheduleRestartAsync();
128+
await ScheduleRestartAsync(reason);
129129
}
130130

131-
private async Task ScheduleRestartAsync()
131+
private async Task ScheduleRestartAsync(string reason)
132132
{
133133
if (Interlocked.Read(ref _suspensionRequestsCount) > 0)
134134
{
135-
_logger.LogDebug("Restart requested while currently suspended. Ignoring request.");
135+
_logger.LogDebug("Restart requested while currently suspended. Reason: '{Reason}'. Ignoring request.", reason);
136136
}
137137
else
138138
{
@@ -142,7 +142,7 @@ private async Task ScheduleRestartAsync()
142142
}
143143
else
144144
{
145-
await _restart();
145+
await _restart(reason);
146146
}
147147
}
148148
}
@@ -164,14 +164,18 @@ private void InitializeFileWatchers()
164164
}
165165

166166
_eventSubscriptions.Add(_eventManager.OfType<HostRestartEvent>()
167-
.Subscribe((msg) => ScheduleRestartAsync(false)
168-
.ContinueWith(t => _logger.LogCritical(t.Exception.Message),
169-
TaskContinuationOptions.OnlyOnFaulted | TaskContinuationOptions.ExecuteSynchronously)));
167+
.Subscribe(HandleHostRestartEvent));
170168

171169
// Delay starting up for logging and debug file watchers to avoid long start up times
172170
Utility.ExecuteAfterColdStartDelay(_environment, InitializeSecondaryFileWatchers);
173171
}
174172

173+
private void HandleHostRestartEvent(HostRestartEvent restartEvent)
174+
{
175+
ScheduleRestartAsync($"Handling {nameof(HostRestartEvent)} ('{restartEvent.Name}':'{restartEvent.Source}'). Reason: {restartEvent.Reason}", false)
176+
.ContinueWith(t => _logger.LogCritical(t.Exception?.Message), TaskContinuationOptions.OnlyOnFaulted | TaskContinuationOptions.ExecuteSynchronously);
177+
}
178+
175179
/// <summary>
176180
/// Initializes the file and directory monitoring that does not need to happen as part of a Host startup
177181
/// These watchers can be started after a delay to avoid startup performance issue
@@ -293,7 +297,8 @@ private void OnFileChanged(FileSystemEventArgs e)
293297
changeDescription = "File";
294298
if (File.Exists(e.FullPath))
295299
{
296-
TraceFileChangeRestart(changeDescription, e.ChangeType.ToString(), e.FullPath, isShutdown: true);
300+
string fileChangeMsg = string.Format(CultureInfo.InvariantCulture, "{0} change of type '{1}' detected for '{2}'", changeDescription, e.ChangeType.ToString(), e.FullPath);
301+
TraceFileChangeRestart(fileChangeMsg, isShutdown: true);
297302
Shutdown();
298303
}
299304
}
@@ -316,15 +321,16 @@ private void OnFileChanged(FileSystemEventArgs e)
316321
shutdown = true;
317322
}
318323

319-
TraceFileChangeRestart(changeDescription, e.ChangeType.ToString(), e.FullPath, shutdown);
320-
ScheduleRestartAsync(shutdown).ContinueWith(t => _logger.LogError(t.Exception, $"Error restarting host (full shutdown: {shutdown})"),
321-
TaskContinuationOptions.ExecuteSynchronously | TaskContinuationOptions.OnlyOnFaulted);
324+
string fileChangeMsg = string.Format(CultureInfo.InvariantCulture, "{0} change of type '{1}' detected for '{2}'", changeDescription, e.ChangeType.ToString(), e.FullPath);
325+
TraceFileChangeRestart(fileChangeMsg, shutdown);
326+
ScheduleRestartAsync(fileChangeMsg, shutdown)
327+
.ContinueWith(t => _logger.LogError(t.Exception, $"Error restarting host (full shutdown: {shutdown})"),
328+
TaskContinuationOptions.ExecuteSynchronously | TaskContinuationOptions.OnlyOnFaulted);
322329
}
323330
}
324331

325-
private void TraceFileChangeRestart(string changeDescription, string changeType, string path, bool isShutdown)
332+
private void TraceFileChangeRestart(string fileChangeMsg, bool isShutdown)
326333
{
327-
string fileChangeMsg = string.Format(CultureInfo.InvariantCulture, "{0} change of type '{1}' detected for '{2}'", changeDescription, changeType, path);
328334
_logger.LogInformation(fileChangeMsg);
329335

330336
string action = isShutdown ? "shutdown" : "restart";
@@ -349,11 +355,11 @@ internal static string GetRelativeDirectory(string path, string scriptRoot)
349355
return string.Empty;
350356
}
351357

352-
private Task RestartAsync()
358+
private Task RestartAsync(string reason)
353359
{
354360
if (!_shutdownScheduled && Interlocked.Exchange(ref _restartRequested, 1) == 0)
355361
{
356-
return _scriptHostManager.RestartHostAsync();
362+
return _scriptHostManager.RestartHostAsync(reason);
357363
}
358364

359365
return Task.CompletedTask;

src/WebJobs.Script.WebHost/Middleware/HostWarmupMiddleware.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ public async Task HostWarmupAsync(HttpRequest request)
158158
{
159159
if (request.Query.TryGetValue("restart", out StringValues value) && string.Compare("1", value) == 0)
160160
{
161-
await _hostManager.RestartHostAsync(CancellationToken.None);
161+
await _hostManager.RestartHostAsync("Host warmup call requested a restart.", CancellationToken.None);
162162

163163
// This call is here for sanity, but we should be fully initialized.
164164
await _hostManager.DelayUntilHostReady();

src/WebJobs.Script.WebHost/Standby/StandbyManager.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ public Task SpecializeHostAsync()
7878
if (t.IsFaulted)
7979
{
8080
// if we fail during specialization for whatever reason
81-
// this is fatal, so we shutdown
81+
// this is fatal, so we stop the application.
8282
_logger.LogError(t.Exception, "Specialization failed. Shutting down.");
8383
_applicationLifetime.StopApplication();
8484
}
@@ -119,7 +119,7 @@ public async Task SpecializeHostCoreAsync()
119119

120120
using (_metricsLogger.LatencyEvent(MetricEventNames.SpecializationRestartHost))
121121
{
122-
await _scriptHostManager.RestartHostAsync();
122+
await _scriptHostManager.RestartHostAsync("Host specialization.");
123123
}
124124

125125
using (_metricsLogger.LatencyEvent(MetricEventNames.SpecializationDelayUntilHostReady))

src/WebJobs.Script.WebHost/WebJobsScriptHostService.cs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -569,8 +569,10 @@ public async Task StopAsync(CancellationToken cancellationToken)
569569
State = ScriptHostState.Stopped;
570570
}
571571

572-
public async Task RestartHostAsync(CancellationToken cancellationToken)
572+
public async Task RestartHostAsync(string reason, CancellationToken cancellationToken)
573573
{
574+
ArgumentException.ThrowIfNullOrWhiteSpace(reason);
575+
574576
if (ShutdownRequested)
575577
{
576578
return;
@@ -586,7 +588,7 @@ public async Task RestartHostAsync(CancellationToken cancellationToken)
586588
await _hostStarted;
587589
}
588590

589-
_logger.EnteringRestart();
591+
_logger.EnteringRestart(reason);
590592

591593
// If anything is mid-startup, cancel it.
592594
_startupLoopTokenSource?.Cancel();
@@ -830,7 +832,7 @@ private void OnHostHealthCheckTimer(object state)
830832
// fail and we'll enter a restart loop (exponentially backing off)
831833
// until the host is healthy again and we can resume host processing.
832834
_logger.UnhealthyRestart();
833-
var tIgnore = RestartHostAsync(CancellationToken.None);
835+
var tIgnore = RestartHostAsync("Host health check detected an unhealthy host.", CancellationToken.None);
834836
}
835837
}
836838

src/WebJobs.Script/Description/DotNet/DotNetFunctionInvoker.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -176,13 +176,13 @@ private async Task ReloadScriptAsync()
176176

177177
// If the compilation succeeded, AND:
178178
// - We haven't cached a function (failed to compile on load), OR
179-
// - We're referencing local function types (i.e. POCOs defined in the function) AND Our our function signature has changed
179+
// - We're referencing local function types (i.e. POCOs defined in the function) AND our function signature has changed
180180
// Restart our host.
181181
if (compilationSucceeded &&
182182
(_functionSignature == null ||
183183
(_functionSignature.HasLocalTypeReference || !_functionSignature.Equals(signature))))
184184
{
185-
await Host.RestartAsync().ConfigureAwait(false);
185+
await Host.RestartAsync(".NET compilation.").ConfigureAwait(false);
186186
}
187187
}
188188

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,20 @@
11
// Copyright (c) .NET Foundation. All rights reserved.
22
// Licensed under the MIT License. See License.txt in the project root for license information.
33

4+
using System;
5+
46
namespace Microsoft.Azure.WebJobs.Script.Eventing
57
{
68
public class HostRestartEvent : ScriptEvent
79
{
8-
public HostRestartEvent()
10+
public HostRestartEvent(string reason)
911
: base(nameof(HostRestartEvent), EventSources.Worker)
1012
{
13+
ArgumentException.ThrowIfNullOrWhiteSpace(reason);
14+
15+
Reason = reason;
1116
}
17+
18+
public string Reason { get; }
1219
}
1320
}

0 commit comments

Comments
 (0)