From 03694b54f07b54ea98c8b8f5e2677a2edfb75ba9 Mon Sep 17 00:00:00 2001 From: sjorsdonkers <72333389+sjorsdonkers@users.noreply.github.com> Date: Tue, 12 Aug 2025 13:49:20 +0200 Subject: [PATCH 1/3] 3# This is a combination of 3 commits. intercept continue and abort feedback First version of headers, no cookies yet --- src/browser/ScriptManager.zig | 3 +- src/browser/browser.zig | 3 + src/browser/page.zig | 5 +- src/browser/xhr/xhr.zig | 11 +- src/cdp/cdp.zig | 21 +++- src/cdp/domains/fetch.zig | 202 +++++++++++++++++++++++++++++++++- src/cdp/domains/network.zig | 25 ++--- src/http/Client.zig | 70 +++++++----- src/http/Http.zig | 67 +++++++++-- src/notification.zig | 14 ++- 10 files changed, 357 insertions(+), 64 deletions(-) diff --git a/src/browser/ScriptManager.zig b/src/browser/ScriptManager.zig index e94320ba3..4d6c3e62d 100644 --- a/src/browser/ScriptManager.zig +++ b/src/browser/ScriptManager.zig @@ -57,7 +57,6 @@ deferreds: OrderList, shutdown: bool = false, - client: *HttpClient, allocator: Allocator, buffer_pool: BufferPool, @@ -234,6 +233,7 @@ pub fn addFromElement(self: *ScriptManager, element: *parser.Element) !void { .url = remote_url.?, .ctx = pending_script, .method = .GET, + .headers = try HttpClient.Headers.init(), .cookie = page.requestCookie(.{}), .start_callback = if (log.enabled(.http, .debug)) startCallback else null, .header_done_callback = headerCallback, @@ -297,6 +297,7 @@ pub fn blockingGet(self: *ScriptManager, url: [:0]const u8) !BlockingResult { try client.blockingRequest(.{ .url = url, .method = .GET, + .headers = try HttpClient.Headers.init(), .ctx = &blocking, .cookie = self.page.requestCookie(.{}), .start_callback = if (log.enabled(.http, .debug)) Blocking.startCallback else null, diff --git a/src/browser/browser.zig b/src/browser/browser.zig index f836ef2d6..1b5636e98 100644 --- a/src/browser/browser.zig +++ b/src/browser/browser.zig @@ -52,6 +52,8 @@ pub const Browser = struct { errdefer env.deinit(); const notification = try Notification.init(allocator, app.notification); + app.http.client.notification = notification; + app.http.client.next_request_id = 0; // Should we track ids in CDP only? errdefer notification.deinit(); return .{ @@ -74,6 +76,7 @@ pub const Browser = struct { self.page_arena.deinit(); self.session_arena.deinit(); self.transfer_arena.deinit(); + self.http_client.notification = null; self.notification.deinit(); self.state_pool.deinit(); } diff --git a/src/browser/page.zig b/src/browser/page.zig index cb181f5b7..b2c89de26 100644 --- a/src/browser/page.zig +++ b/src/browser/page.zig @@ -467,12 +467,15 @@ pub const Page = struct { const owned_url = try self.arena.dupeZ(u8, request_url); self.url = try URL.parse(owned_url, null); + var headers = try HttpClient.Headers.init(); + if (opts.header) |hdr| try headers.add(hdr); + self.http_client.request(.{ .ctx = self, .url = owned_url, .method = opts.method, + .headers = headers, .body = opts.body, - .header = opts.header, .cookie = self.requestCookie(.{ .is_navigation = true }), .header_done_callback = pageHeaderDoneCallback, .data_callback = pageDataCallback, diff --git a/src/browser/xhr/xhr.zig b/src/browser/xhr/xhr.zig index 51393b61f..f22796e50 100644 --- a/src/browser/xhr/xhr.zig +++ b/src/browser/xhr/xhr.zig @@ -370,10 +370,16 @@ pub const XMLHttpRequest = struct { } } + var headers = try HttpClient.Headers.init(); + for (self.headers.items) |hdr| { + try headers.add(hdr); + } + try page.http_client.request(.{ .ctx = self, .url = self.url.?, .method = self.method, + .headers = headers, .body = self.request_body, .cookie = page.requestCookie(.{}), .start_callback = httpStartCallback, @@ -387,11 +393,6 @@ pub const XMLHttpRequest = struct { fn httpStartCallback(transfer: *HttpClient.Transfer) !void { const self: *XMLHttpRequest = @alignCast(@ptrCast(transfer.ctx)); - - for (self.headers.items) |hdr| { - try transfer.addHeader(hdr); - } - log.debug(.http, "request start", .{ .method = self.method, .url = self.url, .source = "xhr" }); self.transfer = transfer; } diff --git a/src/cdp/cdp.zig b/src/cdp/cdp.zig index d859f9252..696448c20 100644 --- a/src/cdp/cdp.zig +++ b/src/cdp/cdp.zig @@ -29,6 +29,7 @@ const Page = @import("../browser/page.zig").Page; const Inspector = @import("../browser/env.zig").Env.Inspector; const Incrementing = @import("../id.zig").Incrementing; const Notification = @import("../notification.zig").Notification; +const InterceptState = @import("domains/fetch.zig").InterceptState; const polyfill = @import("../browser/polyfill/polyfill.zig"); @@ -75,6 +76,8 @@ pub fn CDPT(comptime TypeProvider: type) type { // Extra headers to add to all requests. TBD under which conditions this should be reset. extra_headers: std.ArrayListUnmanaged(std.http.Header) = .empty, + intercept_state: InterceptState, + const Self = @This(); pub fn init(app: *App, client: TypeProvider.Client) !Self { @@ -89,6 +92,7 @@ pub fn CDPT(comptime TypeProvider: type) type { .browser_context = null, .message_arena = std.heap.ArenaAllocator.init(allocator), .notification_arena = std.heap.ArenaAllocator.init(allocator), + .intercept_state = try InterceptState.init(allocator), // TBD or browser session arena? }; } @@ -96,6 +100,7 @@ pub fn CDPT(comptime TypeProvider: type) type { if (self.browser_context) |*bc| { bc.deinit(); } + self.intercept_state.deinit(); // TBD Should this live in BC? self.browser.deinit(); self.message_arena.deinit(); self.notification_arena.deinit(); @@ -451,6 +456,14 @@ pub fn BrowserContext(comptime CDP_T: type) type { self.cdp.browser.notification.unregister(.http_request_complete, self); } + pub fn fetchEnable(self: *Self) !void { + try self.cdp.browser.notification.register(.http_request_intercept, self, onHttpRequestIntercept); + } + + pub fn fetchDisable(self: *Self) void { + self.cdp.browser.notification.unregister(.http_request_intercept, self); + } + pub fn onPageRemove(ctx: *anyopaque, _: Notification.PageRemove) !void { const self: *Self = @alignCast(@ptrCast(ctx)); return @import("domains/page.zig").pageRemove(self); @@ -475,7 +488,13 @@ pub fn BrowserContext(comptime CDP_T: type) type { pub fn onHttpRequestStart(ctx: *anyopaque, data: *const Notification.RequestStart) !void { const self: *Self = @alignCast(@ptrCast(ctx)); defer self.resetNotificationArena(); - return @import("domains/network.zig").httpRequestStart(self.notification_arena, self, data); + try @import("domains/network.zig").httpRequestStart(self.notification_arena, self, data); + } + + pub fn onHttpRequestIntercept(ctx: *anyopaque, data: *const Notification.RequestIntercept) !void { + const self: *Self = @alignCast(@ptrCast(ctx)); + defer self.resetNotificationArena(); + try @import("domains/fetch.zig").requestPaused(self.notification_arena, self, data); } pub fn onHttpRequestFail(ctx: *anyopaque, data: *const Notification.RequestFail) !void { diff --git a/src/cdp/domains/fetch.zig b/src/cdp/domains/fetch.zig index ea87cc929..708e3d9aa 100644 --- a/src/cdp/domains/fetch.zig +++ b/src/cdp/domains/fetch.zig @@ -1,4 +1,4 @@ -// Copyright (C) 2023-2024 Lightpanda (Selecy SAS) +// Copyright (C) 2023-2025 Lightpanda (Selecy SAS) // // Francis Bouvier // Pierre Tachoire @@ -17,13 +17,211 @@ // along with this program. If not, see . const std = @import("std"); +const Allocator = std.mem.Allocator; +const Notification = @import("../../notification.zig").Notification; +const log = @import("../../log.zig"); +const Request = @import("../../http/Client.zig").Request; +const Method = @import("../../http/Client.zig").Method; pub fn processMessage(cmd: anytype) !void { const action = std.meta.stringToEnum(enum { disable, + enable, + continueRequest, + failRequest, }, cmd.input.action) orelse return error.UnknownMethod; switch (action) { - .disable => return cmd.sendResult(null, .{}), + .disable => return disable(cmd), + .enable => return enable(cmd), + .continueRequest => return continueRequest(cmd), + .failRequest => return failRequest(cmd), } } + +// Stored in CDP +pub const InterceptState = struct { + const Self = @This(); + waiting: std.AutoArrayHashMap(u64, Request), + + pub fn init(allocator: Allocator) !InterceptState { + return .{ + .waiting = std.AutoArrayHashMap(u64, Request).init(allocator), + }; + } + + pub fn deinit(self: *Self) void { + self.waiting.deinit(); + } +}; + +const RequestPattern = struct { + urlPattern: []const u8 = "*", // Wildcards ('*' -> zero or more, '?' -> exactly one) are allowed. Escape character is backslash. Omitting is equivalent to "*". + resourceType: ?ResourceType = null, + requestStage: RequestStage = .Request, +}; +const ResourceType = enum { + Document, + Stylesheet, + Image, + Media, + Font, + Script, + TextTrack, + XHR, + Fetch, + Prefetch, + EventSource, + WebSocket, + Manifest, + SignedExchange, + Ping, + CSPViolationReport, + Preflight, + FedCM, + Other, +}; +const RequestStage = enum { + Request, + Response, +}; + +const EnableParam = struct { + patterns: []RequestPattern = &.{}, + handleAuthRequests: bool = false, +}; +const ErrorReason = enum { + Failed, + Aborted, + TimedOut, + AccessDenied, + ConnectionClosed, + ConnectionReset, + ConnectionRefused, + ConnectionAborted, + ConnectionFailed, + NameNotResolved, + InternetDisconnected, + AddressUnreachable, + BlockedByClient, + BlockedByResponse, +}; + +fn disable(cmd: anytype) !void { + const bc = cmd.browser_context orelse return error.BrowserContextNotLoaded; + bc.fetchDisable(); + return cmd.sendResult(null, .{}); +} + +fn enable(cmd: anytype) !void { + const params = (try cmd.params(EnableParam)) orelse EnableParam{}; + if (params.patterns.len != 0) log.warn(.cdp, "Fetch.enable No patterns yet", .{}); + if (params.handleAuthRequests) log.warn(.cdp, "Fetch.enable No auth yet", .{}); + + const bc = cmd.browser_context orelse return error.BrowserContextNotLoaded; + try bc.fetchEnable(); + + return cmd.sendResult(null, .{}); +} + +pub fn requestPaused(arena: Allocator, bc: anytype, intercept: *const Notification.RequestIntercept) !void { + var cdp = bc.cdp; + + // unreachable because we _have_ to have a page. + const session_id = bc.session_id orelse unreachable; + const target_id = bc.target_id orelse unreachable; + + // We keep it around to wait for modifications to the request. + // NOTE: we assume whomever created the request created it with a lifetime of the Page. + // TODO: What to do when receiving replies for a previous page's requests? + + try cdp.intercept_state.waiting.put(intercept.request.id.?, intercept.request.*); + + // NOTE: .request data preparation is duped from network.zig + const full_request_url = try std.Uri.parse(intercept.request.url); + const request_url = try @import("network.zig").urlToString(arena, &full_request_url, .{ + .scheme = true, + .authentication = true, + .authority = true, + .path = true, + .query = true, + }); + const request_fragment = try @import("network.zig").urlToString(arena, &full_request_url, .{ + .fragment = true, + }); + const headers = try intercept.request.headers.asHashMap(arena); + // End of duped code + + try cdp.sendEvent("Fetch.requestPaused", .{ + .requestId = try std.fmt.allocPrint(arena, "INTERCEPT-{d}", .{intercept.request.id.?}), + .request = .{ + .url = request_url, + .urlFragment = request_fragment, + .method = @tagName(intercept.request.method), + .hasPostData = intercept.request.body != null, + .headers = std.json.ArrayHashMap([]const u8){ .map = headers }, + }, + .frameId = target_id, + .resourceType = ResourceType.Document, // TODO! + .networkId = try std.fmt.allocPrint(arena, "REQ-{d}", .{intercept.request.id.?}), + }, .{ .session_id = session_id }); + + // Await either continueRequest, failRequest or fulfillRequest + intercept.wait_for_interception.* = true; +} + +const HeaderEntry = struct { + name: []const u8, + value: []const u8, +}; + +fn continueRequest(cmd: anytype) !void { + const bc = cmd.browser_context orelse return error.BrowserContextNotLoaded; + const params = (try cmd.params(struct { + requestId: []const u8, // "INTERCEPT-{d}" + url: ?[]const u8 = null, + method: ?[]const u8 = null, + postData: ?[]const u8 = null, + headers: ?[]const HeaderEntry = null, + interceptResponse: bool = false, + })) orelse return error.InvalidParams; + if (params.postData != null or params.headers != null or params.interceptResponse) return error.NotYetImplementedParams; + + const request_id = try idFromRequestId(params.requestId); + var waiting_request = (bc.cdp.intercept_state.waiting.fetchSwapRemove(request_id) orelse return error.RequestNotFound).value; + + // Update the request with the new parameters + if (params.url) |url| { + // The request url must be modified in a way that's not observable by page. So page.url is not updated. + waiting_request.url = try bc.cdp.browser.page_arena.allocator().dupeZ(u8, url); + } + if (params.method) |method| { + waiting_request.method = std.meta.stringToEnum(Method, method) orelse return error.InvalidParams; + } + + log.info(.cdp, "Request continued by intercept", .{ .id = params.requestId }); + try bc.cdp.browser.http_client.request(waiting_request); + + return cmd.sendResult(null, .{}); +} + +fn failRequest(cmd: anytype) !void { + const bc = cmd.browser_context orelse return error.BrowserContextNotLoaded; + var state = &bc.cdp.intercept_state; + const params = (try cmd.params(struct { + requestId: []const u8, // "INTERCEPT-{d}" + errorReason: ErrorReason, + })) orelse return error.InvalidParams; + + const request_id = try idFromRequestId(params.requestId); + if (state.waiting.fetchSwapRemove(request_id) == null) return error.RequestNotFound; + + log.info(.cdp, "Request aborted by intercept", .{ .reason = params.errorReason }); + return cmd.sendResult(null, .{}); +} + +// Get u64 from requestId which is formatted as: "INTERCEPT-{d}" +fn idFromRequestId(request_id: []const u8) !u64 { + if (!std.mem.startsWith(u8, request_id, "INTERCEPT-")) return error.InvalidParams; + return std.fmt.parseInt(u64, request_id[10..], 10) catch return error.InvalidParams; +} diff --git a/src/cdp/domains/network.zig b/src/cdp/domains/network.zig index 44701faef..164cc745e 100644 --- a/src/cdp/domains/network.zig +++ b/src/cdp/domains/network.zig @@ -223,7 +223,7 @@ pub fn httpRequestFail(arena: Allocator, bc: anytype, request: *const Notificati }, .{ .session_id = session_id }); } -pub fn httpRequestStart(arena: Allocator, bc: anytype, request: *const Notification.RequestStart) !void { +pub fn httpRequestStart(arena: Allocator, bc: anytype, data: *const Notification.RequestStart) !void { // Isn't possible to do a network request within a Browser (which our // notification is tied to), without a page. std.debug.assert(bc.session.page != null); @@ -251,36 +251,31 @@ pub fn httpRequestStart(arena: Allocator, bc: anytype, request: *const Notificat .query = true, }); - const request_url = try urlToString(arena, request.url, .{ + const full_request_url = try std.Uri.parse(data.request.url); + const request_url = try urlToString(arena, &full_request_url, .{ .scheme = true, .authentication = true, .authority = true, .path = true, .query = true, }); - - const request_fragment = try urlToString(arena, request.url, .{ - .fragment = true, + const request_fragment = try urlToString(arena, &full_request_url, .{ + .fragment = true, // TODO since path is false, this likely does not work as intended }); - // @newhttp - const headers: std.StringArrayHashMapUnmanaged([]const u8) = .empty; - // try headers.ensureTotalCapacity(arena, request.headers.items.len); - // for (request.headers.items) |header| { - // headers.putAssumeCapacity(header.name, header.value); - // } + const headers = try data.request.headers.asHashMap(arena); // We're missing a bunch of fields, but, for now, this seems like enough try cdp.sendEvent("Network.requestWillBeSent", .{ - .requestId = try std.fmt.allocPrint(arena, "REQ-{d}", .{request.id}), + .requestId = try std.fmt.allocPrint(arena, "REQ-{d}", .{data.request.id.?}), .frameId = target_id, .loaderId = bc.loader_id, .documentUrl = document_url, .request = .{ .url = request_url, .urlFragment = request_fragment, - .method = @tagName(request.method), - .hasPostData = request.has_body, + .method = @tagName(data.request.method), + .hasPostData = data.request.body != null, .headers = std.json.ArrayHashMap([]const u8){ .map = headers }, }, }, .{ .session_id = session_id }); @@ -326,7 +321,7 @@ pub fn httpRequestComplete(arena: Allocator, bc: anytype, request: *const Notifi }, .{ .session_id = session_id }); } -fn urlToString(arena: Allocator, url: *const std.Uri, opts: std.Uri.WriteToStreamOptions) ![]const u8 { +pub fn urlToString(arena: Allocator, url: *const std.Uri, opts: std.Uri.WriteToStreamOptions) ![]const u8 { var buf: std.ArrayListUnmanaged(u8) = .empty; try url.writeToStream(opts, buf.writer(arena)); return buf.items; diff --git a/src/http/Client.zig b/src/http/Client.zig index f2a26dfcf..56673d228 100644 --- a/src/http/Client.zig +++ b/src/http/Client.zig @@ -20,6 +20,8 @@ const std = @import("std"); const log = @import("../log.zig"); const builtin = @import("builtin"); const Http = @import("Http.zig"); +pub const Headers = Http.Headers; +const Notification = @import("../notification.zig").Notification; const c = Http.c; @@ -57,6 +59,9 @@ multi: *c.CURLM, // of easys. handles: Handles, +// Use to generate the next request ID +next_request_id: u64 = 0, + // When handles has no more available easys, requests get queued. queue: RequestQueue, @@ -74,6 +79,9 @@ transfer_pool: std.heap.MemoryPool(Transfer), // see ScriptManager.blockingGet blocking: Handle, +// To notify registered subscribers of events, the browser sets/nulls this for us. +notification: ?*Notification = null, + // The only place this is meant to be used is in `makeRequest` BEFORE `perform` // is called. It is used to generate our Cookie header. It can be used for other // purposes, but keep in mind that, while single-threaded, calls like makeRequest @@ -184,12 +192,26 @@ pub fn tick(self: *Client, timeout_ms: usize) !void { } pub fn request(self: *Client, req: Request) !void { + var req_copy = req; // We need it mutable + + if (req_copy.id == null) { // If the ID has already been set that means the request was previously intercepted + req_copy.id = self.next_request_id; + self.next_request_id += 1; + if (self.notification) |notification| { + notification.dispatch(.http_request_start, &.{ .request = &req_copy }); + + var wait_for_interception = false; + notification.dispatch(.http_request_intercept, &.{ .request = &req_copy, .wait_for_interception = &wait_for_interception }); + if (wait_for_interception) return; // The user is send an invitation to intercept this request. + } + } + if (self.handles.getFreeHandle()) |handle| { - return self.makeRequest(handle, req); + return self.makeRequest(handle, req_copy); } const node = try self.queue_node_pool.create(); - node.data = req; + node.data = req_copy; self.queue.append(node); } @@ -239,7 +261,8 @@ fn makeRequest(self: *Client, handle: *Handle, req: Request) !void { return; }; - const header_list = blk: { + var header_list = req.headers; + { errdefer self.handles.release(handle); try conn.setMethod(req.method); try conn.setURL(req.url); @@ -248,31 +271,23 @@ fn makeRequest(self: *Client, handle: *Handle, req: Request) !void { try conn.setBody(b); } - var header_list = conn.commonHeaders(); - errdefer c.curl_slist_free_all(header_list); + // { // TODO move up to `fn request()` + // const aa = self.arena.allocator(); + // var arr: std.ArrayListUnmanaged(u8) = .{}; + // try req.cookie.forRequest(&uri, arr.writer(aa)); - if (req.header) |hdr| { - header_list = c.curl_slist_append(header_list, hdr); - } + // if (arr.items.len > 0) { + // try arr.append(aa, 0); //null terminate - { - const aa = self.arena.allocator(); - var arr: std.ArrayListUnmanaged(u8) = .{}; - try req.cookie.forRequest(&uri, arr.writer(aa)); - - if (arr.items.len > 0) { - try arr.append(aa, 0); //null terminate - - // copies the value - header_list = c.curl_slist_append(header_list, @ptrCast(arr.items.ptr)); - defer _ = self.arena.reset(.{ .retain_with_limit = 2048 }); - } - } - - try errorCheck(c.curl_easy_setopt(easy, c.CURLOPT_HTTPHEADER, header_list)); + // // copies the value + // header_list = c.curl_slist_append(header_list, @ptrCast(arr.items.ptr)); + // defer _ = self.arena.reset(.{ .retain_with_limit = 2048 }); + // } + // } - break :blk header_list; - }; + try conn.secretHeaders(&header_list); // Add headers that must be hidden from intercepts + try errorCheck(c.curl_easy_setopt(easy, c.CURLOPT_HTTPHEADER, header_list.headers)); + } { errdefer self.handles.release(handle); @@ -284,7 +299,7 @@ fn makeRequest(self: *Client, handle: *Handle, req: Request) !void { .req = req, .ctx = req.ctx, .handle = handle, - ._request_header_list = header_list, + ._request_header_list = header_list.headers, }; errdefer self.transfer_pool.destroy(transfer); @@ -471,10 +486,11 @@ pub const RequestCookie = struct { }; pub const Request = struct { + id: ?u64 = null, method: Method, url: [:0]const u8, + headers: Headers, body: ?[]const u8 = null, - header: ?[:0]const u8 = null, cookie: RequestCookie, // arbitrary data that can be associated with this request diff --git a/src/http/Http.zig b/src/http/Http.zig index 521765686..b45965241 100644 --- a/src/http/Http.zig +++ b/src/http/Http.zig @@ -185,20 +185,20 @@ pub const Connection = struct { try errorCheck(c.curl_easy_setopt(easy, c.CURLOPT_POSTFIELDS, body.ptr)); } - pub fn commonHeaders(self: *const Connection) *c.curl_slist { - var header_list = c.curl_slist_append(null, "User-Agent: Lightpanda/1.0"); + // These are headers that may not be send to the users for inteception. + pub fn secretHeaders(self: *const Connection, headers: *Headers) !void { if (self.opts.proxy_bearer_token) |hdr| { - header_list = c.curl_slist_append(header_list, hdr); + try headers.add(hdr); } - return header_list; } pub fn request(self: *const Connection) !u16 { const easy = self.easy; - const header_list = self.commonHeaders(); - defer c.curl_slist_free_all(header_list); - try errorCheck(c.curl_easy_setopt(easy, c.CURLOPT_HTTPHEADER, header_list)); + const header_list = try Headers.init(); + defer header_list.deinit(); + try self.secretHeaders(&header_list); + try errorCheck(c.curl_easy_setopt(easy, c.CURLOPT_HTTPHEADER, header_list.headers)); try errorCheck(c.curl_easy_perform(easy)); var http_code: c_long = undefined; @@ -210,6 +210,59 @@ pub const Connection = struct { } }; +pub const Headers = struct { + headers: *c.curl_slist, + + pub fn init() !Headers { + const header_list = c.curl_slist_append(null, "User-Agent: Lightpanda/1.0"); + if (header_list == null) return error.OutOfMemory; + return .{ .headers = header_list }; + } + + pub fn deinit(self: *Headers) void { + c.curl_slist_free_all(self.headers); + } + + pub fn add(self: *Headers, header: [*c]const u8) !void { + const updated_headers = c.curl_slist_append(self.headers, header); + if (updated_headers == null) return error.OutOfMemory; + self.headers = updated_headers; + } + + pub fn asHashMap(self: *const Headers, allocator: Allocator) !std.StringArrayHashMapUnmanaged([]const u8) { + var list: std.StringArrayHashMapUnmanaged([]const u8) = .empty; + try list.ensureTotalCapacity(allocator, self.count()); + + var current: [*c]c.curl_slist = self.headers; + while (current) |node| { + const str = std.mem.span(@as([*:0]const u8, @ptrCast(node.*.data))); + const header = parseHeader(str) orelse return error.InvalidHeader; + list.putAssumeCapacity(header.name, header.value); + current = node.*.next; + } + return list; + } + + fn parseHeader(header_str: []const u8) ?struct { name: []const u8, value: []const u8 } { + const colon_pos = std.mem.indexOf(u8, header_str, ":") orelse return null; + + const name = std.mem.trim(u8, header_str[0..colon_pos], " \t"); + const value = std.mem.trim(u8, header_str[colon_pos + 1 ..], " \t"); + + return .{ .name = name, .value = value }; + } + + pub fn count(self: *const Headers) usize { + var current: [*c]c.curl_slist = self.headers; + var num: usize = 0; + while (current) |node| { + num += 1; + current = node.*.next; + } + return num; + } +}; + pub fn errorCheck(code: c.CURLcode) errors.Error!void { if (code == c.CURLE_OK) { return; diff --git a/src/notification.zig b/src/notification.zig index 7caed0883..e351e9b31 100644 --- a/src/notification.zig +++ b/src/notification.zig @@ -4,6 +4,7 @@ const log = @import("log.zig"); const URL = @import("url.zig").URL; const page = @import("browser/page.zig"); const Http = @import("http/Http.zig"); +const Request = @import("http/Client.zig").Request; const Allocator = std.mem.Allocator; @@ -61,6 +62,7 @@ pub const Notification = struct { page_navigated: List = .{}, http_request_fail: List = .{}, http_request_start: List = .{}, + http_request_intercept: List = .{}, http_request_complete: List = .{}, notification_created: List = .{}, }; @@ -72,6 +74,7 @@ pub const Notification = struct { page_navigated: *const PageNavigated, http_request_fail: *const RequestFail, http_request_start: *const RequestStart, + http_request_intercept: *const RequestIntercept, http_request_complete: *const RequestComplete, notification_created: *Notification, }; @@ -91,11 +94,12 @@ pub const Notification = struct { }; pub const RequestStart = struct { - arena: Allocator, - id: usize, - url: *const std.Uri, - method: Http.Method, - has_body: bool, + request: *Request, + }; + + pub const RequestIntercept = struct { + request: *Request, + wait_for_interception: *bool, }; pub const RequestFail = struct { From 77eee7f087b69854409869bb4457ec6398ab247b Mon Sep 17 00:00:00 2001 From: sjorsdonkers <72333389+sjorsdonkers@users.noreply.github.com> Date: Tue, 12 Aug 2025 14:40:23 +0200 Subject: [PATCH 2/3] Cookies --- src/browser/ScriptManager.zig | 14 ++++++++++---- src/browser/page.zig | 3 ++- src/browser/xhr/xhr.zig | 3 ++- src/http/Client.zig | 34 ++++++++++++++++------------------ src/http/Http.zig | 1 + 5 files changed, 31 insertions(+), 24 deletions(-) diff --git a/src/browser/ScriptManager.zig b/src/browser/ScriptManager.zig index 4d6c3e62d..3d5b86bdf 100644 --- a/src/browser/ScriptManager.zig +++ b/src/browser/ScriptManager.zig @@ -229,12 +229,15 @@ pub fn addFromElement(self: *ScriptManager, element: *parser.Element) !void { errdefer pending_script.deinit(); + var headers = try HttpClient.Headers.init(); + try page.requestCookie(.{}).headersForRequest(self.allocator, remote_url.?, &headers); + try self.client.request(.{ .url = remote_url.?, .ctx = pending_script, .method = .GET, - .headers = try HttpClient.Headers.init(), - .cookie = page.requestCookie(.{}), + .headers = headers, + .cookie_jar = page.cookie_jar, .start_callback = if (log.enabled(.http, .debug)) startCallback else null, .header_done_callback = headerCallback, .data_callback = dataCallback, @@ -293,13 +296,16 @@ pub fn blockingGet(self: *ScriptManager, url: [:0]const u8) !BlockingResult { .buffer_pool = &self.buffer_pool, }; + var headers = try HttpClient.Headers.init(); + try self.page.requestCookie(.{}).headersForRequest(self.allocator, url, &headers); + var client = self.client; try client.blockingRequest(.{ .url = url, .method = .GET, - .headers = try HttpClient.Headers.init(), + .headers = headers, + .cookie_jar = self.page.cookie_jar, .ctx = &blocking, - .cookie = self.page.requestCookie(.{}), .start_callback = if (log.enabled(.http, .debug)) Blocking.startCallback else null, .header_done_callback = Blocking.headerCallback, .data_callback = Blocking.dataCallback, diff --git a/src/browser/page.zig b/src/browser/page.zig index b2c89de26..cfe261f9b 100644 --- a/src/browser/page.zig +++ b/src/browser/page.zig @@ -469,6 +469,7 @@ pub const Page = struct { var headers = try HttpClient.Headers.init(); if (opts.header) |hdr| try headers.add(hdr); + try self.requestCookie(.{ .is_navigation = true }).headersForRequest(self.arena, owned_url, &headers); self.http_client.request(.{ .ctx = self, @@ -476,7 +477,7 @@ pub const Page = struct { .method = opts.method, .headers = headers, .body = opts.body, - .cookie = self.requestCookie(.{ .is_navigation = true }), + .cookie_jar = self.cookie_jar, .header_done_callback = pageHeaderDoneCallback, .data_callback = pageDataCallback, .done_callback = pageDoneCallback, diff --git a/src/browser/xhr/xhr.zig b/src/browser/xhr/xhr.zig index f22796e50..720b813b6 100644 --- a/src/browser/xhr/xhr.zig +++ b/src/browser/xhr/xhr.zig @@ -374,6 +374,7 @@ pub const XMLHttpRequest = struct { for (self.headers.items) |hdr| { try headers.add(hdr); } + try page.requestCookie(.{}).headersForRequest(self.arena, self.url.?, &headers); try page.http_client.request(.{ .ctx = self, @@ -381,7 +382,7 @@ pub const XMLHttpRequest = struct { .method = self.method, .headers = headers, .body = self.request_body, - .cookie = page.requestCookie(.{}), + .cookie_jar = page.cookie_jar, .start_callback = httpStartCallback, .header_callback = httpHeaderCallback, .header_done_callback = httpHeaderDoneCallback, diff --git a/src/http/Client.zig b/src/http/Client.zig index 56673d228..417677722 100644 --- a/src/http/Client.zig +++ b/src/http/Client.zig @@ -22,6 +22,7 @@ const builtin = @import("builtin"); const Http = @import("Http.zig"); pub const Headers = Http.Headers; const Notification = @import("../notification.zig").Notification; +const storage = @import("../browser/storage/storage.zig"); const c = Http.c; @@ -271,20 +272,6 @@ fn makeRequest(self: *Client, handle: *Handle, req: Request) !void { try conn.setBody(b); } - // { // TODO move up to `fn request()` - // const aa = self.arena.allocator(); - // var arr: std.ArrayListUnmanaged(u8) = .{}; - // try req.cookie.forRequest(&uri, arr.writer(aa)); - - // if (arr.items.len > 0) { - // try arr.append(aa, 0); //null terminate - - // // copies the value - // header_list = c.curl_slist_append(header_list, @ptrCast(arr.items.ptr)); - // defer _ = self.arena.reset(.{ .retain_with_limit = 2048 }); - // } - // } - try conn.secretHeaders(&header_list); // Add headers that must be hidden from intercepts try errorCheck(c.curl_easy_setopt(easy, c.CURLOPT_HTTPHEADER, header_list.headers)); } @@ -475,13 +462,24 @@ pub const RequestCookie = struct { origin: *const std.Uri, jar: *@import("../browser/storage/cookie.zig").Jar, - fn forRequest(self: *const RequestCookie, uri: *const std.Uri, writer: anytype) !void { - return self.jar.forRequest(uri, writer, .{ + pub fn headersForRequest(self: *const RequestCookie, temp: Allocator, url: [:0]const u8, headers: *Headers) !void { + const uri = std.Uri.parse(url) catch |err| { + log.warn(.http, "invalid url", .{ .err = err, .url = url }); + return error.InvalidUrl; + }; + + var arr: std.ArrayListUnmanaged(u8) = .{}; + try self.jar.forRequest(&uri, arr.writer(temp), .{ .is_http = self.is_http, .is_navigation = self.is_navigation, .origin_uri = self.origin, .prefix = "Cookie: ", }); + + if (arr.items.len > 0) { + try arr.append(temp, 0); //null terminate + try headers.add(@ptrCast(arr.items.ptr)); + } } }; @@ -491,7 +489,7 @@ pub const Request = struct { url: [:0]const u8, headers: Headers, body: ?[]const u8 = null, - cookie: RequestCookie, + cookie_jar: *storage.CookieJar, // arbitrary data that can be associated with this request ctx: *anyopaque = undefined, @@ -616,7 +614,7 @@ pub const Transfer = struct { if (header.len > SET_COOKIE_LEN) { if (std.ascii.eqlIgnoreCase(header[0..SET_COOKIE_LEN], "set-cookie:")) { const value = std.mem.trimLeft(u8, header[SET_COOKIE_LEN..], " "); - transfer.req.cookie.jar.populateFromResponse(&transfer.uri, value) catch |err| { + transfer.req.cookie_jar.populateFromResponse(&transfer.uri, value) catch |err| { log.err(.http, "set cookie", .{ .err = err, .req = transfer }); }; } diff --git a/src/http/Http.zig b/src/http/Http.zig index b45965241..aa5c59a28 100644 --- a/src/http/Http.zig +++ b/src/http/Http.zig @@ -224,6 +224,7 @@ pub const Headers = struct { } pub fn add(self: *Headers, header: [*c]const u8) !void { + // Copies the value const updated_headers = c.curl_slist_append(self.headers, header); if (updated_headers == null) return error.OutOfMemory; self.headers = updated_headers; From a49154acf4df3e57b4845abf7386c16522f2a6d3 Mon Sep 17 00:00:00 2001 From: sjorsdonkers <72333389+sjorsdonkers@users.noreply.github.com> Date: Tue, 12 Aug 2025 15:20:48 +0200 Subject: [PATCH 3/3] http_request_fail --- src/cdp/domains/network.zig | 6 +++--- src/http/Client.zig | 23 +++++++++++++++++++---- src/notification.zig | 5 ++--- 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/src/cdp/domains/network.zig b/src/cdp/domains/network.zig index 164cc745e..f143ff522 100644 --- a/src/cdp/domains/network.zig +++ b/src/cdp/domains/network.zig @@ -203,7 +203,7 @@ fn putAssumeCapacity(headers: *std.ArrayListUnmanaged(std.http.Header), extra: s return true; } -pub fn httpRequestFail(arena: Allocator, bc: anytype, request: *const Notification.RequestFail) !void { +pub fn httpRequestFail(arena: Allocator, bc: anytype, data: *const Notification.RequestFail) !void { // It's possible that the request failed because we aborted when the client // sent Target.closeTarget. In that case, bc.session_id will be cleared // already, and we can skip sending these messages to the client. @@ -215,10 +215,10 @@ pub fn httpRequestFail(arena: Allocator, bc: anytype, request: *const Notificati // We're missing a bunch of fields, but, for now, this seems like enough try bc.cdp.sendEvent("Network.loadingFailed", .{ - .requestId = try std.fmt.allocPrint(arena, "REQ-{d}", .{request.id}), + .requestId = try std.fmt.allocPrint(arena, "REQ-{d}", .{data.request.id.?}), // Seems to be what chrome answers with. I assume it depends on the type of error? .type = "Ping", - .errorText = request.err, + .errorText = data.err, .canceled = false, }, .{ .session_id = session_id }); } diff --git a/src/http/Client.zig b/src/http/Client.zig index 417677722..c72da2ee4 100644 --- a/src/http/Client.zig +++ b/src/http/Client.zig @@ -151,7 +151,7 @@ pub fn abort(self: *Client) void { log.err(.http, "get private info", .{ .err = err, .source = "abort" }); continue; }; - transfer.req.error_callback(transfer.ctx, error.Abort); + self.requestFailed(&transfer.req, error.Abort); self.endTransfer(transfer); } std.debug.assert(self.active == 0); @@ -221,6 +221,20 @@ pub fn blockingRequest(self: *Client, req: Request) !void { return self.makeRequest(&self.blocking, req); } +fn requestFailed(self: *Client, req: *Request, err: anyerror) void { + if (req._notified_fail) return; + req._notified_fail = true; + + if (self.notification) |notification| { + notification.dispatch(.http_request_fail, &.{ + .request = req, + .err = err, + }); + } + + req.error_callback(req.ctx, err); +} + // Restrictive since it'll only work if there are no inflight requests. In some // cases, the libcurl documentation is clear that changing settings while a // connection is inflight is undefined. It doesn't say anything about CURLOPT_PROXY, @@ -326,7 +340,6 @@ fn perform(self: *Client, timeout_ms: c_int) !void { const transfer = try Transfer.fromEasy(easy); const ctx = transfer.ctx; const done_callback = transfer.req.done_callback; - const error_callback = transfer.req.error_callback; // release it ASAP so that it's available; some done_callbacks // will load more resources. @@ -336,10 +349,10 @@ fn perform(self: *Client, timeout_ms: c_int) !void { done_callback(ctx) catch |err| { // transfer isn't valid at this point, don't use it. log.err(.http, "done_callback", .{ .err = err }); - error_callback(ctx, err); + self.requestFailed(&transfer.req, err); }; } else |err| { - error_callback(ctx, err); + self.requestFailed(&transfer.req, err); } } } @@ -491,6 +504,8 @@ pub const Request = struct { body: ?[]const u8 = null, cookie_jar: *storage.CookieJar, + _notified_fail: bool = false, + // arbitrary data that can be associated with this request ctx: *anyopaque = undefined, diff --git a/src/notification.zig b/src/notification.zig index e351e9b31..6f2e29c90 100644 --- a/src/notification.zig +++ b/src/notification.zig @@ -103,9 +103,8 @@ pub const Notification = struct { }; pub const RequestFail = struct { - id: usize, - url: *const std.Uri, - err: []const u8, + request: *Request, + err: anyerror, }; pub const RequestComplete = struct {