From 34422414b33998aa6e6297748b6d30d6d80e1251 Mon Sep 17 00:00:00 2001 From: rcombs Date: Wed, 25 Dec 2024 13:46:34 -0800 Subject: [PATCH 1/4] sws_utils: re-add mp_image_swscale, with opts/log handling This reverts commit d9eb9ed844546fe857b921a46582314788ca34b4 --- video/sws_utils.c | 12 ++++++++++++ video/sws_utils.h | 3 +++ 2 files changed, 15 insertions(+) diff --git a/video/sws_utils.c b/video/sws_utils.c index a3aa57e5c23a0..a11d627237783 100644 --- a/video/sws_utils.c +++ b/video/sws_utils.c @@ -424,6 +424,18 @@ int mp_sws_scale(struct mp_sws_context *ctx, struct mp_image *dst, return 0; } +int mp_image_swscale(struct mp_image *dst, struct mp_image *src, + struct mpv_global *global, struct mp_log *log) +{ + struct mp_sws_context *ctx = mp_sws_alloc(NULL); + ctx->log = log; + if (global) + mp_sws_enable_cmdline_opts(ctx, global); + int res = mp_sws_scale(ctx, dst, src); + talloc_free(ctx); + return res; +} + int mp_image_sw_blur_scale(struct mp_image *dst, struct mp_image *src, float gblur) { diff --git a/video/sws_utils.h b/video/sws_utils.h index 8c0826592d0f8..41d44e7734027 100644 --- a/video/sws_utils.h +++ b/video/sws_utils.h @@ -17,6 +17,9 @@ extern const int mp_sws_fast_flags; bool mp_sws_supported_format(int imgfmt); +int mp_image_swscale(struct mp_image *dst, struct mp_image *src, + struct mpv_global *global, struct mp_log *log); + int mp_image_sw_blur_scale(struct mp_image *dst, struct mp_image *src, float gblur); From 2b633eef9f5cd6329eca9da1e275670714993e3f Mon Sep 17 00:00:00 2001 From: rcombs Date: Sun, 16 Mar 2025 23:19:01 +0900 Subject: [PATCH 2/4] [osdep] add image conversion utility for macOS --- meson.build | 1 + osdep/mac/app_bridge_objc.h | 4 + osdep/mac/image_conversion.swift | 230 +++++++++++++++++++++++++++++++ 3 files changed, 235 insertions(+) create mode 100644 osdep/mac/image_conversion.swift diff --git a/meson.build b/meson.build index 7572769e0e6f2..92b152cba3071 100644 --- a/meson.build +++ b/meson.build @@ -1563,6 +1563,7 @@ if features['cocoa'] and features['swift'] 'osdep/mac/clipboard.swift', 'osdep/mac/dialog.swift', 'osdep/mac/event_helper.swift', + 'osdep/mac/image_conversion.swift', 'osdep/mac/input_helper.swift', 'osdep/mac/log_helper.swift', 'osdep/mac/menu_bar.swift', diff --git a/osdep/mac/app_bridge_objc.h b/osdep/mac/app_bridge_objc.h index bcde2caf8a6cd..54c3497eb6379 100644 --- a/osdep/mac/app_bridge_objc.h +++ b/osdep/mac/app_bridge_objc.h @@ -18,6 +18,8 @@ #import #import +#include + #include "player/client.h" #include "video/out/libmpv.h" #include "mpv/render_gl.h" @@ -26,10 +28,12 @@ #include "player/core.h" #include "player/clipboard/clipboard.h" #include "common/global.h" +#include "common/msg.h" #include "input/input.h" #include "input/event.h" #include "input/keycodes.h" #include "video/out/win_state.h" +#include "video/sws_utils.h" #include "osdep/main-fn.h" #include "osdep/mac/app_bridge.h" diff --git a/osdep/mac/image_conversion.swift b/osdep/mac/image_conversion.swift new file mode 100644 index 0000000000000..36270274f4d6b --- /dev/null +++ b/osdep/mac/image_conversion.swift @@ -0,0 +1,230 @@ +class ImageConversion { + var global: UnsafeMutablePointer + var log: OpaquePointer + + init(_ g: UnsafeMutablePointer, _ l: OpaquePointer) { + global = g + log = l + } + + private func getColorspaceName(_ plcsp: pl_color_space, gray: Bool) -> CFString? { + if gray { + if plcsp.transfer == PL_COLOR_TRC_LINEAR { + return CGColorSpace.linearGray + } else if plcsp.transfer == PL_COLOR_TRC_GAMMA22 { + return CGColorSpace.genericGrayGamma2_2 + } + } else { + switch plcsp.primaries { + case PL_COLOR_PRIM_DISPLAY_P3: + if plcsp.transfer == PL_COLOR_TRC_BT_1886 { + return CGColorSpace.displayP3 + } else if plcsp.transfer == PL_COLOR_TRC_HLG { + return CGColorSpace.displayP3_HLG + } + case PL_COLOR_PRIM_BT_709: + if plcsp.transfer == PL_COLOR_TRC_LINEAR { + return CGColorSpace.linearSRGB + } else if plcsp.transfer == PL_COLOR_TRC_BT_1886 { + return CGColorSpace.itur_709 + } else if plcsp.transfer == PL_COLOR_TRC_SRGB { + return CGColorSpace.sRGB + } + case PL_COLOR_PRIM_DCI_P3: + if plcsp.transfer == PL_COLOR_TRC_BT_1886 { + return CGColorSpace.dcip3 + } + case PL_COLOR_PRIM_BT_2020: + if plcsp.transfer == PL_COLOR_TRC_BT_1886 { + return CGColorSpace.itur_2020 + } + case PL_COLOR_PRIM_ADOBE: + return CGColorSpace.adobeRGB1998 + case PL_COLOR_PRIM_APPLE: + if plcsp.transfer == PL_COLOR_TRC_LINEAR { + return CGColorSpace.genericRGBLinear + } + default: + break + } + } + + return nil + } + + private func convertIntoRep(_ rep: NSBitmapImageRep, imgfmt: Int32, plcsp: pl_color_space, bps: Int32, image: mp_image) -> Bool { + var image = image + var dest = mp_image() + mp_image_setfmt(&dest, imgfmt) + mp_image_set_size(&dest, image.w, image.h) + + let planes = UnsafeMutablePointer?>.allocate(capacity: 5) + rep.getBitmapDataPlanes(planes) + + if !withUnsafeMutableBytes(of: &dest.stride, { (stridePtr) -> Bool in + return withUnsafeMutableBytes(of: &dest.planes) { (planesPtr) -> Bool in + guard let destStrides = stridePtr.baseAddress?.assumingMemoryBound(to: type(of: image.stride.0)) else { + return false + } + guard let destPlanes = planesPtr.baseAddress?.assumingMemoryBound(to: type(of: image.planes.0)) else { + return false + } + + for i in 0..= 0 + } + + private func createImageRep(_ image: mp_image) -> NSBitmapImageRep? { + // Need it to nominally be mutable to pass to C functions later + var image = image + var imgfmt = image.imgfmt + + var compatible = true + switch imgfmt { + case IMGFMT_YAP8, IMGFMT_YAP16, IMGFMT_Y8, IMGFMT_Y16, IMGFMT_ARGB, IMGFMT_RGBA, IMGFMT_RGB0, IMGFMT_RGBA64: + break + default: + compatible = false + } + + if image.params.repr.levels != PL_COLOR_LEVELS_FULL { + compatible = false + } + + if image.num_planes > 5 { + return nil + } + + let planes = UnsafeMutablePointer?>.allocate(capacity: 5) + planes.initialize(repeating: nil, count: 5) + + var bps = image.fmt.comps.0.size + var spp = mp_imgfmt_desc_get_num_comps(&image.fmt) + let alpha = (image.fmt.flags & MP_IMGFLAG_ALPHA) != 0 + let gray = (image.fmt.flags & MP_IMGFLAG_GRAY) != 0 + let csp: NSColorSpaceName = gray ? .calibratedWhite : .calibratedRGB + var formatFlags: NSBitmapImageRep.Format = [] + if alpha && image.fmt.comps.3.plane == 0 && image.fmt.comps.3.offset == 0 { + formatFlags.insert(.alphaFirst) + } + if image.params.repr.alpha == PL_ALPHA_INDEPENDENT { + formatFlags.insert(.alphaNonpremultiplied) + } + var bpp = image.fmt.bpp.0 + var bytesPerRow = image.stride.0 + var planar = image.num_planes > 1 + + if !withUnsafeBytes(of: &image.stride, { (rawPtr) -> Bool in + let ptr = rawPtr.baseAddress!.assumingMemoryBound(to: type(of: image.stride.0)) + for i in 0.. 8 { + imgfmt = alpha ? IMGFMT_YAP16 : IMGFMT_Y16 + bpp = 16 + } else { + imgfmt = alpha ? IMGFMT_YAP8 : IMGFMT_Y8 + bpp = 8 + } + } else { + if bps > 8 { + imgfmt = IMGFMT_RGBA64 + bpp = 64 + } else { + imgfmt = alpha ? IMGFMT_RGBA : IMGFMT_RGB0 + bpp = 32 + } + } + + bytesPerRow = 0 + planar = (gray && alpha) + spp = (gray ? 1 : 3) + (alpha ? 1 : 0) + } + + guard let rep = NSBitmapImageRep(bitmapDataPlanes: planes, + pixelsWide: Int(image.w), + pixelsHigh: Int(image.h), + bitsPerSample: Int(bps), + samplesPerPixel: Int(spp), + hasAlpha: alpha, + isPlanar: planar, + colorSpaceName: csp, + bitmapFormat: formatFlags, + bytesPerRow: Int(bytesPerRow), + bitsPerPixel: Int(bpp)) else { + return nil + } + + var plcsp = image.params.color + let cgSpaceName = getColorspaceName(plcsp, gray: gray) + + if cgSpaceName == nil { + compatible = false + plcsp.primaries = PL_COLOR_PRIM_BT_709 + plcsp.transfer = PL_COLOR_TRC_SRGB + } + + guard let nscsp = (!gray && image.icc_profile != nil) ? + NSColorSpace(iccProfileData: Data(bytes: image.icc_profile.pointee.data, count: image.icc_profile.pointee.size)) : + (CGColorSpace(name: cgSpaceName ?? CGColorSpace.sRGB).flatMap { NSColorSpace(cgColorSpace: $0) }) else { + return nil + } + + guard let rep = rep.retagging(with: nscsp) else { + return nil + } + + if !compatible && !convertIntoRep(rep, imgfmt: Int32(imgfmt.rawValue), plcsp: plcsp, bps: Int32(bps), image: image) { + return nil + } + + return rep + } + + func convertImage(_ image: mp_image) -> CGImage? { + guard let rep = createImageRep(image) else { + return .none + } + + return rep.cgImage + } +} From 874af0791eea35eac1690be3a79b4ced1a2509af Mon Sep 17 00:00:00 2001 From: rcombs Date: Sun, 16 Mar 2025 23:19:22 +0900 Subject: [PATCH 3/4] [WIP] screenshot PoC utils --- player/client.c | 6 ++++++ player/client.h | 1 + player/screenshot.c | 5 +++++ player/screenshot.h | 2 ++ 4 files changed, 14 insertions(+) diff --git a/player/client.c b/player/client.c index 96bf202e3537b..10888c3d2842a 100644 --- a/player/client.c +++ b/player/client.c @@ -41,6 +41,7 @@ #include "osdep/threads.h" #include "osdep/timer.h" #include "osdep/io.h" +#include "player/screenshot.h" #include "stream/stream.h" #include "command.h" @@ -1334,6 +1335,11 @@ int mpv_set_property(mpv_handle *ctx, const char *name, mpv_format format, return req.status; } +struct mp_image *mp_take_screenshot(mpv_handle *ctx, int mode) +{ + return capture_screenshot(ctx->mpctx, mode); +} + int mpv_del_property(mpv_handle *ctx, const char *name) { const char* args[] = { "del", name, NULL }; diff --git a/player/client.h b/player/client.h index eed1a1cc8a225..f6364fc191876 100644 --- a/player/client.h +++ b/player/client.h @@ -35,6 +35,7 @@ struct mpv_handle *mp_new_client(struct mp_client_api *clients, const char *name void mp_client_set_weak(struct mpv_handle *ctx); struct mp_log *mp_client_get_log(struct mpv_handle *ctx); struct mpv_global *mp_client_get_global(struct mpv_handle *ctx); +struct mp_image *mp_take_screenshot(struct mpv_handle *ctx, int mode); void mp_client_broadcast_event_external(struct mp_client_api *api, int event, void *data); diff --git a/player/screenshot.c b/player/screenshot.c index 8943c155c5342..e30236e6238be 100644 --- a/player/screenshot.c +++ b/player/screenshot.c @@ -422,6 +422,11 @@ static struct mp_image *screenshot_get(struct MPContext *mpctx, int mode, return image; } +struct mp_image *capture_screenshot(struct MPContext *mpctx, int mode) +{ + return screenshot_get(mpctx, mode, false); +} + struct mp_image *convert_image(struct mp_image *image, int destfmt, struct mpv_global *global, struct mp_log *log) { diff --git a/player/screenshot.h b/player/screenshot.h index 97abc79bde550..01ea592b8aa01 100644 --- a/player/screenshot.h +++ b/player/screenshot.h @@ -31,6 +31,8 @@ void screenshot_init(struct MPContext *mpctx); // Called by the playback core on each iteration. void handle_each_frame_screenshot(struct MPContext *mpctx); +struct mp_image *capture_screenshot(struct MPContext *mpctx, int mode); + /* Return the image converted to the given format. If the pixel aspect ratio is * not 1:1, the image is scaled as well. Returns NULL on failure. * If global!=NULL, use command line scaler options etc. From 43bb4ec277f99db29ba4e8db4c243d14c4f937b8 Mon Sep 17 00:00:00 2001 From: rcombs Date: Sun, 16 Mar 2025 23:19:38 +0900 Subject: [PATCH 4/4] [WIP] support Live Text on macOS --- video/out/mac/view.swift | 112 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 110 insertions(+), 2 deletions(-) diff --git a/video/out/mac/view.swift b/video/out/mac/view.swift index 8c9c006f94e57..469963c461712 100644 --- a/video/out/mac/view.swift +++ b/video/out/mac/view.swift @@ -16,14 +16,20 @@ */ import Cocoa +import VisionKit -class View: NSView, CALayerDelegate { +class View: NSView, CALayerDelegate, ImageAnalysisOverlayViewDelegate, EventSubscriber { unowned var common: Common var input: InputHelper? { return common.input } var tracker: NSTrackingArea? var hasMouseDown: Bool = false var lastMouseDownEvent: NSEvent? + private var analyzer: ImageAnalyzer? + private var analyzerConfig: ImageAnalyzer.Configuration? + private var analysisOverlayView: ImageAnalysisOverlayView? + private var isPaused: Bool = false { didSet { updateAnalysis() } } + private var imageConverter: ImageConversion? override var isFlipped: Bool { return true } override var acceptsFirstResponder: Bool { return true } @@ -35,6 +41,86 @@ class View: NSView, CALayerDelegate { wantsBestResolutionOpenGLSurface = true wantsExtendedDynamicRangeOpenGLSurface = true registerForDraggedTypes([ .fileURL, .URL, .string ]) + self.autoresizesSubviews = true + + if #available(macOS 13, *) { + if ImageAnalyzer.isSupported { + analyzer = ImageAnalyzer() + analyzerConfig = ImageAnalyzer.Configuration([.text]) + let overlayView = ImageAnalysisOverlayView() + overlayView.preferredInteractionTypes = [.automaticTextOnly] + overlayView.autoresizingMask = [.width, .height] + overlayView.frame = CGRect(x: 0, y: 0, width: frame.width, height: frame.height) + overlayView.isSupplementaryInterfaceHidden = true + overlayView.delegate = self + analysisOverlayView = overlayView + addSubview(overlayView) + + imageConverter = ImageConversion(mp_client_get_global(AppHub.shared.mpv)!, mp_client_get_log(AppHub.shared.mpv)!) + + setupAnalysisEvents() + } + } + } + + @available(macOS 13, *) + func overlayView(_ overlayView: ImageAnalysisOverlayView, + shouldBeginAt point: CGPoint, + forAnalysisType analysisType: ImageAnalysisOverlayView.InteractionTypes + ) -> Bool { + return true + } + + @available(macOS 13, *) + private func setImageAnalysis(_ analysis: ImageAnalysis?) { + if let analysisView = analysisOverlayView { + analysisView.resetSelection() + analysisView.analysis = isPaused ? analysis : .none + } + } + + @available(macOS 13, *) + private func setupAnalysisEvents() { + if let event = AppHub.shared.event { + event.subscribe(self, event: .init(name: "pause", format: MPV_FORMAT_FLAG)) + event.subscribe(self, event: .init(name: "video-target-params/w", format: MPV_FORMAT_INT64)) + event.subscribe(self, event: .init(name: "video-target-params/h", format: MPV_FORMAT_INT64)) + } + } + + func handle(event: EventHelper.Event) { + switch event.name { + case "pause": isPaused = event.bool ?? false + case "video-target-params/w", "video-target-params/h": if #available(macOS 13, *) { + if let overlayView = analysisOverlayView { + overlayView.setContentsRectNeedsUpdate() + } + updateAnalysis() + } + default: break + } + } + + private func updateAnalysis() { + if #available(macOS 13, *) { + if !isPaused { + setImageAnalysis(.none) + } else { + if let mpv = AppHub.shared.mpv, let a = analyzer, let config = analyzerConfig, let converter = imageConverter { + if let image = mp_take_screenshot(mpv, 1) { + if let converted = converter.convertImage(image.pointee) { + Task { + do { + setImageAnalysis(try await a.analyze(converted, orientation: .up, configuration: config)) + } catch { + setImageAnalysis(.none) + } + } + } + } + } + } + } } required init?(coder: NSCoder) { @@ -89,7 +175,7 @@ class View: NSView, CALayerDelegate { } override func acceptsFirstMouse(for event: NSEvent?) -> Bool { - return true + return false } override func becomeFirstResponder() -> Bool { @@ -105,6 +191,7 @@ class View: NSView, CALayerDelegate { input?.put(key: SWIFT_KEY_MOUSE_ENTER) } common.updateCursorVisibility() + super.mouseEntered(with: event) } override func mouseExited(with event: NSEvent) { @@ -113,27 +200,32 @@ class View: NSView, CALayerDelegate { } common.titleBar?.hide() common.setCursorVisibility(true) + super.mouseExited(with: event) } override func mouseMoved(with event: NSEvent) { signalMouseMovement(event) common.titleBar?.show() + super.mouseMoved(with: event) } override func mouseDragged(with event: NSEvent) { signalMouseMovement(event) + super.mouseDragged(with: event) } override func mouseDown(with event: NSEvent) { hasMouseDown = event.clickCount <= 1 input?.processMouse(event: event) lastMouseDownEvent = event + super.mouseDown(with: event) } override func mouseUp(with event: NSEvent) { hasMouseDown = false common.window?.isMoving = false input?.processMouse(event: event) + super.mouseUp(with: event) } override func rightMouseDown(with event: NSEvent) { @@ -163,6 +255,14 @@ class View: NSView, CALayerDelegate { } func signalMouseMovement(_ event: NSEvent) { + if #available(macOS 13, *) { + if let overlayView = analysisOverlayView, hasMouseDown { + if overlayView.hasInteractiveItem(at: event.locationInWindow) { + return + } + } + } + var point = convert(event.locationInWindow, from: nil) point = convertToBacking(point) point.y = -point.y @@ -176,6 +276,14 @@ class View: NSView, CALayerDelegate { input?.processWheel(event: event) } + func contentView(for overlayView: ImageAnalysisOverlayView) -> NSView? { + return self + } + + func contentsRect(for overlayView: ImageAnalysisOverlayView) -> CGRect { + return CGRect(x: 0.0, y: 0.0, width: 1.0, height: 1.0) + } + func containsMouseLocation() -> Bool { var topMargin: CGFloat = 0.0 let menuBarHeight = NSApp.mainMenu?.menuBarHeight ?? 23.0