Skip to content

request interception #930

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 3 commits into
base: nonblocking_libcurl
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/browser/ScriptManager.zig
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ deferreds: OrderList,

shutdown: bool = false,


client: *HttpClient,
allocator: Allocator,
buffer_pool: BufferPool,
Expand Down Expand Up @@ -230,6 +229,7 @@ pub fn addFromElement(self: *ScriptManager, element: *parser.Element) !void {
.url = remote_url.?,
.ctx = pending_script,
.method = .GET,
.headers = try HttpClient.Headers.init(),
.cookie = page.requestCookie(.{}),
.start_callback = if (log.enabled(.http, .debug)) startCallback else null,
.header_done_callback = headerCallback,
Expand Down Expand Up @@ -293,6 +293,7 @@ pub fn blockingGet(self: *ScriptManager, url: [:0]const u8) !BlockingResult {
try client.blockingRequest(.{
.url = url,
.method = .GET,
.headers = try HttpClient.Headers.init(),
.ctx = &blocking,
.cookie = self.page.requestCookie(.{}),
.start_callback = if (log.enabled(.http, .debug)) Blocking.startCallback else null,
Expand Down
3 changes: 3 additions & 0 deletions src/browser/browser.zig
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ pub const Browser = struct {
errdefer env.deinit();

const notification = try Notification.init(allocator, app.notification);
app.http.client.notification = notification;
app.http.client.next_request_id = 0; // Should we track ids in CDP only?
errdefer notification.deinit();

return .{
Expand All @@ -74,6 +76,7 @@ pub const Browser = struct {
self.page_arena.deinit();
self.session_arena.deinit();
self.transfer_arena.deinit();
self.http_client.notification = null;
self.notification.deinit();
self.state_pool.deinit();
}
Expand Down
5 changes: 4 additions & 1 deletion src/browser/page.zig
Original file line number Diff line number Diff line change
Expand Up @@ -433,12 +433,15 @@ pub const Page = struct {
const owned_url = try self.arena.dupeZ(u8, request_url);
self.url = try URL.parse(owned_url, null);

var headers = try HttpClient.Headers.init();
if (opts.header) |hdr| try headers.add(hdr);

self.http_client.request(.{
.ctx = self,
.url = owned_url,
.method = opts.method,
.headers = headers,
.body = opts.body,
.header = opts.header,
.cookie = self.requestCookie(.{ .is_navigation = true }),
.header_done_callback = pageHeaderDoneCallback,
.data_callback = pageDataCallback,
Expand Down
11 changes: 6 additions & 5 deletions src/browser/xhr/xhr.zig
Original file line number Diff line number Diff line change
Expand Up @@ -370,10 +370,16 @@ pub const XMLHttpRequest = struct {
}
}

var headers = try HttpClient.Headers.init();
for (self.headers.items) |hdr| {
try headers.add(hdr);
}

try page.http_client.request(.{
.ctx = self,
.url = self.url.?,
.method = self.method,
.headers = headers,
.body = self.request_body,
.cookie = page.requestCookie(.{}),
.start_callback = httpStartCallback,
Expand All @@ -387,11 +393,6 @@ pub const XMLHttpRequest = struct {

fn httpStartCallback(transfer: *HttpClient.Transfer) !void {
const self: *XMLHttpRequest = @alignCast(@ptrCast(transfer.ctx));

for (self.headers.items) |hdr| {
try transfer.addHeader(hdr);
}

log.debug(.http, "request start", .{ .method = self.method, .url = self.url, .source = "xhr" });
self.transfer = transfer;
}
Expand Down
21 changes: 20 additions & 1 deletion src/cdp/cdp.zig
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ const Page = @import("../browser/page.zig").Page;
const Inspector = @import("../browser/env.zig").Env.Inspector;
const Incrementing = @import("../id.zig").Incrementing;
const Notification = @import("../notification.zig").Notification;
const InterceptState = @import("domains/fetch.zig").InterceptState;

const polyfill = @import("../browser/polyfill/polyfill.zig");

Expand Down Expand Up @@ -75,6 +76,8 @@ pub fn CDPT(comptime TypeProvider: type) type {
// Extra headers to add to all requests. TBD under which conditions this should be reset.
extra_headers: std.ArrayListUnmanaged(std.http.Header) = .empty,

intercept_state: InterceptState,

const Self = @This();

pub fn init(app: *App, client: TypeProvider.Client) !Self {
Expand All @@ -89,13 +92,15 @@ pub fn CDPT(comptime TypeProvider: type) type {
.browser_context = null,
.message_arena = std.heap.ArenaAllocator.init(allocator),
.notification_arena = std.heap.ArenaAllocator.init(allocator),
.intercept_state = try InterceptState.init(allocator), // TBD or browser session arena?
};
}

pub fn deinit(self: *Self) void {
if (self.browser_context) |*bc| {
bc.deinit();
}
self.intercept_state.deinit(); // TBD Should this live in BC?
self.browser.deinit();
self.message_arena.deinit();
self.notification_arena.deinit();
Expand Down Expand Up @@ -451,6 +456,14 @@ pub fn BrowserContext(comptime CDP_T: type) type {
self.cdp.browser.notification.unregister(.http_request_complete, self);
}

pub fn fetchEnable(self: *Self) !void {
try self.cdp.browser.notification.register(.http_request_intercept, self, onHttpRequestIntercept);
}

pub fn fetchDisable(self: *Self) void {
self.cdp.browser.notification.unregister(.http_request_intercept, self);
}

pub fn onPageRemove(ctx: *anyopaque, _: Notification.PageRemove) !void {
const self: *Self = @alignCast(@ptrCast(ctx));
return @import("domains/page.zig").pageRemove(self);
Expand All @@ -475,7 +488,13 @@ pub fn BrowserContext(comptime CDP_T: type) type {
pub fn onHttpRequestStart(ctx: *anyopaque, data: *const Notification.RequestStart) !void {
const self: *Self = @alignCast(@ptrCast(ctx));
defer self.resetNotificationArena();
return @import("domains/network.zig").httpRequestStart(self.notification_arena, self, data);
try @import("domains/network.zig").httpRequestStart(self.notification_arena, self, data);
}

pub fn onHttpRequestIntercept(ctx: *anyopaque, data: *const Notification.RequestIntercept) !void {
const self: *Self = @alignCast(@ptrCast(ctx));
defer self.resetNotificationArena();
try @import("domains/fetch.zig").requestPaused(self.notification_arena, self, data);
}

pub fn onHttpRequestFail(ctx: *anyopaque, data: *const Notification.RequestFail) !void {
Expand Down
202 changes: 200 additions & 2 deletions src/cdp/domains/fetch.zig
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (C) 2023-2024 Lightpanda (Selecy SAS)
// Copyright (C) 2023-2025 Lightpanda (Selecy SAS)
//
// Francis Bouvier <francis@lightpanda.io>
// Pierre Tachoire <pierre@lightpanda.io>
Expand All @@ -17,13 +17,211 @@
// along with this program. If not, see <https://www.gnu.org/licenses/>.

const std = @import("std");
const Allocator = std.mem.Allocator;
const Notification = @import("../../notification.zig").Notification;
const log = @import("../../log.zig");
const Request = @import("../../http/Client.zig").Request;
const Method = @import("../../http/Client.zig").Method;

pub fn processMessage(cmd: anytype) !void {
const action = std.meta.stringToEnum(enum {
disable,
enable,
continueRequest,
failRequest,
}, cmd.input.action) orelse return error.UnknownMethod;

switch (action) {
.disable => return cmd.sendResult(null, .{}),
.disable => return disable(cmd),
.enable => return enable(cmd),
.continueRequest => return continueRequest(cmd),
.failRequest => return failRequest(cmd),
}
}

// Stored in CDP
pub const InterceptState = struct {
const Self = @This();
waiting: std.AutoArrayHashMap(u64, Request),

pub fn init(allocator: Allocator) !InterceptState {
return .{
.waiting = std.AutoArrayHashMap(u64, Request).init(allocator),
};
}

pub fn deinit(self: *Self) void {
self.waiting.deinit();
}
};

const RequestPattern = struct {
urlPattern: []const u8 = "*", // Wildcards ('*' -> zero or more, '?' -> exactly one) are allowed. Escape character is backslash. Omitting is equivalent to "*".
resourceType: ?ResourceType = null,
requestStage: RequestStage = .Request,
};
const ResourceType = enum {
Document,
Stylesheet,
Image,
Media,
Font,
Script,
TextTrack,
XHR,
Fetch,
Prefetch,
EventSource,
WebSocket,
Manifest,
SignedExchange,
Ping,
CSPViolationReport,
Preflight,
FedCM,
Other,
};
const RequestStage = enum {
Request,
Response,
};

const EnableParam = struct {
patterns: []RequestPattern = &.{},
handleAuthRequests: bool = false,
};
const ErrorReason = enum {
Failed,
Aborted,
TimedOut,
AccessDenied,
ConnectionClosed,
ConnectionReset,
ConnectionRefused,
ConnectionAborted,
ConnectionFailed,
NameNotResolved,
InternetDisconnected,
AddressUnreachable,
BlockedByClient,
BlockedByResponse,
};

fn disable(cmd: anytype) !void {
const bc = cmd.browser_context orelse return error.BrowserContextNotLoaded;
bc.fetchDisable();
return cmd.sendResult(null, .{});
}

fn enable(cmd: anytype) !void {
const params = (try cmd.params(EnableParam)) orelse EnableParam{};
if (params.patterns.len != 0) log.warn(.cdp, "Fetch.enable No patterns yet", .{});
if (params.handleAuthRequests) log.warn(.cdp, "Fetch.enable No auth yet", .{});

const bc = cmd.browser_context orelse return error.BrowserContextNotLoaded;
try bc.fetchEnable();

return cmd.sendResult(null, .{});
}

pub fn requestPaused(arena: Allocator, bc: anytype, intercept: *const Notification.RequestIntercept) !void {
var cdp = bc.cdp;

// unreachable because we _have_ to have a page.
const session_id = bc.session_id orelse unreachable;
const target_id = bc.target_id orelse unreachable;

// We keep it around to wait for modifications to the request.
// NOTE: we assume whomever created the request created it with a lifetime of the Page.
// TODO: What to do when receiving replies for a previous page's requests?

try cdp.intercept_state.waiting.put(intercept.request.id.?, intercept.request.*);

// NOTE: .request data preparation is duped from network.zig
const full_request_url = try std.Uri.parse(intercept.request.url);
const request_url = try @import("network.zig").urlToString(arena, &full_request_url, .{
.scheme = true,
.authentication = true,
.authority = true,
.path = true,
.query = true,
});
const request_fragment = try @import("network.zig").urlToString(arena, &full_request_url, .{
.fragment = true,
});
const headers = try intercept.request.headers.asHashMap(arena);
// End of duped code

try cdp.sendEvent("Fetch.requestPaused", .{
.requestId = try std.fmt.allocPrint(arena, "INTERCEPT-{d}", .{intercept.request.id.?}),
.request = .{
.url = request_url,
.urlFragment = request_fragment,
.method = @tagName(intercept.request.method),
.hasPostData = intercept.request.body != null,
.headers = std.json.ArrayHashMap([]const u8){ .map = headers },
},
.frameId = target_id,
.resourceType = ResourceType.Document, // TODO!
.networkId = try std.fmt.allocPrint(arena, "REQ-{d}", .{intercept.request.id.?}),
}, .{ .session_id = session_id });

// Await either continueRequest, failRequest or fulfillRequest
intercept.wait_for_interception.* = true;
}

const HeaderEntry = struct {
name: []const u8,
value: []const u8,
};

fn continueRequest(cmd: anytype) !void {
const bc = cmd.browser_context orelse return error.BrowserContextNotLoaded;
const params = (try cmd.params(struct {
requestId: []const u8, // "INTERCEPT-{d}"
url: ?[]const u8 = null,
method: ?[]const u8 = null,
postData: ?[]const u8 = null,
headers: ?[]const HeaderEntry = null,
interceptResponse: bool = false,
})) orelse return error.InvalidParams;
if (params.postData != null or params.headers != null or params.interceptResponse) return error.NotYetImplementedParams;

const request_id = try idFromRequestId(params.requestId);
var waiting_request = (bc.cdp.intercept_state.waiting.fetchSwapRemove(request_id) orelse return error.RequestNotFound).value;

// Update the request with the new parameters
if (params.url) |url| {
// The request url must be modified in a way that's not observable by page. So page.url is not updated.
waiting_request.url = try bc.cdp.browser.page_arena.allocator().dupeZ(u8, url);
}
if (params.method) |method| {
waiting_request.method = std.meta.stringToEnum(Method, method) orelse return error.InvalidParams;
}

log.info(.cdp, "Request continued by intercept", .{ .id = params.requestId });
try bc.cdp.browser.http_client.request(waiting_request);

return cmd.sendResult(null, .{});
}

fn failRequest(cmd: anytype) !void {
const bc = cmd.browser_context orelse return error.BrowserContextNotLoaded;
var state = &bc.cdp.intercept_state;
const params = (try cmd.params(struct {
requestId: []const u8, // "INTERCEPT-{d}"
errorReason: ErrorReason,
})) orelse return error.InvalidParams;

const request_id = try idFromRequestId(params.requestId);
if (state.waiting.fetchSwapRemove(request_id) == null) return error.RequestNotFound;

log.info(.cdp, "Request aborted by intercept", .{ .reason = params.errorReason });
return cmd.sendResult(null, .{});
}

// Get u64 from requestId which is formatted as: "INTERCEPT-{d}"
fn idFromRequestId(request_id: []const u8) !u64 {
if (!std.mem.startsWith(u8, request_id, "INTERCEPT-")) return error.InvalidParams;
return std.fmt.parseInt(u64, request_id[10..], 10) catch return error.InvalidParams;
}
Loading