Skip to content

[PoC] Live Text image analysis on macOS #16063

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 4 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -1563,6 +1563,7 @@ if features['cocoa'] and features['swift']
'osdep/mac/clipboard.swift',
'osdep/mac/dialog.swift',
'osdep/mac/event_helper.swift',
'osdep/mac/image_conversion.swift',
'osdep/mac/input_helper.swift',
'osdep/mac/log_helper.swift',
'osdep/mac/menu_bar.swift',
Expand Down
4 changes: 4 additions & 0 deletions osdep/mac/app_bridge_objc.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
#import <Cocoa/Cocoa.h>
#import <QuartzCore/QuartzCore.h>

#include <libswscale/swscale.h>

#include "player/client.h"
#include "video/out/libmpv.h"
#include "mpv/render_gl.h"
Expand All @@ -26,10 +28,12 @@
#include "player/core.h"
#include "player/clipboard/clipboard.h"
#include "common/global.h"
#include "common/msg.h"
#include "input/input.h"
#include "input/event.h"
#include "input/keycodes.h"
#include "video/out/win_state.h"
#include "video/sws_utils.h"

#include "osdep/main-fn.h"
#include "osdep/mac/app_bridge.h"
Expand Down
230 changes: 230 additions & 0 deletions osdep/mac/image_conversion.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,230 @@
class ImageConversion {
var global: UnsafeMutablePointer<mpv_global>
var log: OpaquePointer

init(_ g: UnsafeMutablePointer<mpv_global>, _ l: OpaquePointer) {
global = g
log = l
}

private func getColorspaceName(_ plcsp: pl_color_space, gray: Bool) -> CFString? {
if gray {
if plcsp.transfer == PL_COLOR_TRC_LINEAR {
return CGColorSpace.linearGray
} else if plcsp.transfer == PL_COLOR_TRC_GAMMA22 {
return CGColorSpace.genericGrayGamma2_2
}
} else {
switch plcsp.primaries {
case PL_COLOR_PRIM_DISPLAY_P3:
if plcsp.transfer == PL_COLOR_TRC_BT_1886 {
return CGColorSpace.displayP3
} else if plcsp.transfer == PL_COLOR_TRC_HLG {
return CGColorSpace.displayP3_HLG
}
case PL_COLOR_PRIM_BT_709:
if plcsp.transfer == PL_COLOR_TRC_LINEAR {
return CGColorSpace.linearSRGB
} else if plcsp.transfer == PL_COLOR_TRC_BT_1886 {
return CGColorSpace.itur_709
} else if plcsp.transfer == PL_COLOR_TRC_SRGB {
return CGColorSpace.sRGB
}
case PL_COLOR_PRIM_DCI_P3:
if plcsp.transfer == PL_COLOR_TRC_BT_1886 {
return CGColorSpace.dcip3
}
case PL_COLOR_PRIM_BT_2020:
if plcsp.transfer == PL_COLOR_TRC_BT_1886 {
return CGColorSpace.itur_2020
}
case PL_COLOR_PRIM_ADOBE:
return CGColorSpace.adobeRGB1998
case PL_COLOR_PRIM_APPLE:
if plcsp.transfer == PL_COLOR_TRC_LINEAR {
return CGColorSpace.genericRGBLinear
}
default:
break
}
}

return nil
}

private func convertIntoRep(_ rep: NSBitmapImageRep, imgfmt: Int32, plcsp: pl_color_space, bps: Int32, image: mp_image) -> Bool {
var image = image
var dest = mp_image()
mp_image_setfmt(&dest, imgfmt)
mp_image_set_size(&dest, image.w, image.h)

let planes = UnsafeMutablePointer<UnsafeMutablePointer<UInt8>?>.allocate(capacity: 5)
rep.getBitmapDataPlanes(planes)

if !withUnsafeMutableBytes(of: &dest.stride, { (stridePtr) -> Bool in
return withUnsafeMutableBytes(of: &dest.planes) { (planesPtr) -> Bool in
guard let destStrides = stridePtr.baseAddress?.assumingMemoryBound(to: type(of: image.stride.0)) else {
return false
}
guard let destPlanes = planesPtr.baseAddress?.assumingMemoryBound(to: type(of: image.planes.0)) else {
return false
}

for i in 0..<Int(MP_MAX_PLANES) {
destPlanes[i] = planes[i]
destStrides[i] = Int32(rep.bytesPerRow)
}

return true
}
}) {
assert(false, "Binding pointer to stride or planes array failed; this should be impossible")
return false
}

dest.params.repr.sys = PL_COLOR_SYSTEM_RGB
dest.params.repr.levels = PL_COLOR_LEVELS_FULL
dest.params.repr.alpha = rep.hasAlpha ? (rep.bitmapFormat.contains(.alphaNonpremultiplied) ? PL_ALPHA_INDEPENDENT : PL_ALPHA_PREMULTIPLIED) : PL_ALPHA_UNKNOWN
dest.params.repr.bits.sample_depth = bps
dest.params.repr.bits.color_depth = bps

dest.params.color = plcsp

return mp_image_swscale(&dest, &image, global, log) >= 0
}

private func createImageRep(_ image: mp_image) -> NSBitmapImageRep? {
// Need it to nominally be mutable to pass to C functions later
var image = image
var imgfmt = image.imgfmt

var compatible = true
switch imgfmt {
case IMGFMT_YAP8, IMGFMT_YAP16, IMGFMT_Y8, IMGFMT_Y16, IMGFMT_ARGB, IMGFMT_RGBA, IMGFMT_RGB0, IMGFMT_RGBA64:
break
default:
compatible = false
}

if image.params.repr.levels != PL_COLOR_LEVELS_FULL {
compatible = false
}

if image.num_planes > 5 {
return nil
}

let planes = UnsafeMutablePointer<UnsafeMutablePointer<UInt8>?>.allocate(capacity: 5)
planes.initialize(repeating: nil, count: 5)

var bps = image.fmt.comps.0.size
var spp = mp_imgfmt_desc_get_num_comps(&image.fmt)
let alpha = (image.fmt.flags & MP_IMGFLAG_ALPHA) != 0
let gray = (image.fmt.flags & MP_IMGFLAG_GRAY) != 0
let csp: NSColorSpaceName = gray ? .calibratedWhite : .calibratedRGB
var formatFlags: NSBitmapImageRep.Format = []
if alpha && image.fmt.comps.3.plane == 0 && image.fmt.comps.3.offset == 0 {
formatFlags.insert(.alphaFirst)
}
if image.params.repr.alpha == PL_ALPHA_INDEPENDENT {
formatFlags.insert(.alphaNonpremultiplied)
}
var bpp = image.fmt.bpp.0
var bytesPerRow = image.stride.0
var planar = image.num_planes > 1

if !withUnsafeBytes(of: &image.stride, { (rawPtr) -> Bool in
let ptr = rawPtr.baseAddress!.assumingMemoryBound(to: type(of: image.stride.0))
for i in 0..<Int(image.num_planes) where ptr[i] != bytesPerRow {
return false
}
return true
}) {
compatible = false
}

if compatible {
withUnsafeBytes(of: &image.planes) { (rawPtr) in
let ptr = rawPtr.baseAddress!.assumingMemoryBound(to: type(of: image.planes.0))
for i in 0..<Int(image.num_planes) {
planes[i] = ptr[i]
}
}

if bpp == 24 {
bpp = 32
}
} else {
bps = bps <= 8 ? 8 : 16
formatFlags.remove(.alphaFirst)
if gray {
if bps > 8 {
imgfmt = alpha ? IMGFMT_YAP16 : IMGFMT_Y16
bpp = 16
} else {
imgfmt = alpha ? IMGFMT_YAP8 : IMGFMT_Y8
bpp = 8
}
} else {
if bps > 8 {
imgfmt = IMGFMT_RGBA64
bpp = 64
} else {
imgfmt = alpha ? IMGFMT_RGBA : IMGFMT_RGB0
bpp = 32
}
}

bytesPerRow = 0
planar = (gray && alpha)
spp = (gray ? 1 : 3) + (alpha ? 1 : 0)
}

guard let rep = NSBitmapImageRep(bitmapDataPlanes: planes,
pixelsWide: Int(image.w),
pixelsHigh: Int(image.h),
bitsPerSample: Int(bps),
samplesPerPixel: Int(spp),
hasAlpha: alpha,
isPlanar: planar,
colorSpaceName: csp,
bitmapFormat: formatFlags,
bytesPerRow: Int(bytesPerRow),
bitsPerPixel: Int(bpp)) else {
return nil
}

var plcsp = image.params.color
let cgSpaceName = getColorspaceName(plcsp, gray: gray)

if cgSpaceName == nil {
compatible = false
plcsp.primaries = PL_COLOR_PRIM_BT_709
plcsp.transfer = PL_COLOR_TRC_SRGB
}

guard let nscsp = (!gray && image.icc_profile != nil) ?
NSColorSpace(iccProfileData: Data(bytes: image.icc_profile.pointee.data, count: image.icc_profile.pointee.size)) :
(CGColorSpace(name: cgSpaceName ?? CGColorSpace.sRGB).flatMap { NSColorSpace(cgColorSpace: $0) }) else {
return nil
}

guard let rep = rep.retagging(with: nscsp) else {
return nil
}

if !compatible && !convertIntoRep(rep, imgfmt: Int32(imgfmt.rawValue), plcsp: plcsp, bps: Int32(bps), image: image) {
return nil
}

return rep
}

func convertImage(_ image: mp_image) -> CGImage? {
guard let rep = createImageRep(image) else {
return .none
}

return rep.cgImage
}
}
6 changes: 6 additions & 0 deletions player/client.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
#include "osdep/threads.h"
#include "osdep/timer.h"
#include "osdep/io.h"
#include "player/screenshot.h"
#include "stream/stream.h"

#include "command.h"
Expand Down Expand Up @@ -1334,6 +1335,11 @@ int mpv_set_property(mpv_handle *ctx, const char *name, mpv_format format,
return req.status;
}

struct mp_image *mp_take_screenshot(mpv_handle *ctx, int mode)
{
return capture_screenshot(ctx->mpctx, mode);
}

int mpv_del_property(mpv_handle *ctx, const char *name)
{
const char* args[] = { "del", name, NULL };
Expand Down
1 change: 1 addition & 0 deletions player/client.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ struct mpv_handle *mp_new_client(struct mp_client_api *clients, const char *name
void mp_client_set_weak(struct mpv_handle *ctx);
struct mp_log *mp_client_get_log(struct mpv_handle *ctx);
struct mpv_global *mp_client_get_global(struct mpv_handle *ctx);
struct mp_image *mp_take_screenshot(struct mpv_handle *ctx, int mode);

void mp_client_broadcast_event_external(struct mp_client_api *api, int event,
void *data);
Expand Down
5 changes: 5 additions & 0 deletions player/screenshot.c
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,11 @@ static struct mp_image *screenshot_get(struct MPContext *mpctx, int mode,
return image;
}

struct mp_image *capture_screenshot(struct MPContext *mpctx, int mode)
{
return screenshot_get(mpctx, mode, false);
}

struct mp_image *convert_image(struct mp_image *image, int destfmt,
struct mpv_global *global, struct mp_log *log)
{
Expand Down
2 changes: 2 additions & 0 deletions player/screenshot.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ void screenshot_init(struct MPContext *mpctx);
// Called by the playback core on each iteration.
void handle_each_frame_screenshot(struct MPContext *mpctx);

struct mp_image *capture_screenshot(struct MPContext *mpctx, int mode);

/* Return the image converted to the given format. If the pixel aspect ratio is
* not 1:1, the image is scaled as well. Returns NULL on failure.
* If global!=NULL, use command line scaler options etc.
Expand Down
Loading
Loading