dottxt-ai
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎benchmarks/__init__.py‎
Lines changed: 13 additions & 0 deletions b/‎benchmarks/__init__.py‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎benchmarks/asv.conf.json‎
Lines changed: 8 additions & 3 deletions b/‎benchmarks/asv.conf.json‎
Lines changed: 8 additions & 3 deletions
diff --git a/‎benchmarks/bench_kernels.py‎
Lines changed: 116 additions & 0 deletions b/‎benchmarks/bench_kernels.py‎
Lines changed: 116 additions & 0 deletions
diff --git a/‎benchmarks/bench_regex_guide.py‎
Lines changed: 19 additions & 0 deletions b/‎benchmarks/bench_regex_guide.py‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎benchmarks/bench_torch_e2e.py‎
Lines changed: 38 additions & 0 deletions b/‎benchmarks/bench_torch_e2e.py‎
Lines changed: 38 additions & 0 deletions
diff --git a/‎outlines_core/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎outlines_core/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎outlines_core/json_schema.py‎
Lines changed: 16 additions & 0 deletions b/‎outlines_core/json_schema.py‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎outlines_core/kernels/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎outlines_core/kernels/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎outlines_core/kernels/mlx.py‎
Lines changed: 140 additions & 0 deletions b/‎outlines_core/kernels/mlx.py‎
Lines changed: 140 additions & 0 deletions
@@ -13,6 +13,7 @@ __pycache__
 *.pyd
 *.so
 benchmarks/results
+benchmarks/env
 build
 Cargo.lock
 dist
 
@@ -0,0 +1,13 @@
+import sys
+
+# There is a conflict between asv.statistics and the standard library's statistics module.
+# This is a workaround to use the standard library's median function.
+if "asv.statistics" in sys.modules:
+
+    def median(data):
+        import statistics
+
+        return statistics.median(data)
+
+    asv_statistics = sys.modules["asv.statistics"]
+    asv_statistics.median = median  # type: ignore
@@ -4,12 +4,17 @@
     "project_url": "https://dottxt-ai.github.io/outlines-core/",
     "repo": "..",
     "branches": [
-	      "HEAD",
+	      "HEAD"
     ],
     "build_command": [
-        "python -mpip install .[test]",
-        "PIP_NO_BUILD_ISOLATION=false python -mpip wheel --no-deps --no-index -w {build_cache_dir} {build_dir}",
+        "python -m pip install .[test]",
+        "PIP_NO_BUILD_ISOLATION=false python -m pip wheel --no-deps --no-index -w {build_cache_dir} {build_dir}"
     ],
+    "matrix": {
+        "torch": ["2.4.0"],
+        "numpy": ["2.2.3"],
+        "numba": ["0.60.0"]
+    },
     "environment_type": "virtualenv",
     "show_commit_url": "https://github.com/dottxt-ai/outlines-core/commit/",
     "benchmark_dir": ".",
 
@@ -0,0 +1,116 @@
+import random
+
+import numpy as np
+import torch
+
+from outlines_core.kernels.numpy import (
+    _apply_token_bitmask_inplace_kernel as numpy_kernel,
+)
+from outlines_core.kernels.torch import (
+    _apply_token_bitmask_inplace_kernel as torch_kernel,
+)
+
+
+def generate_sparse_mask(batch, vocab, allowed_count=1000):
+    mask_shape = (batch, (vocab + 31) // 32)
+    mask = np.zeros(mask_shape, dtype=np.uint32)
+    allowed_indices = random.sample(range(vocab), allowed_count)
+    for idx in allowed_indices:
+        group = idx // 32
+        shift = idx % 32
+        bit_mask = np.uint32(1) << np.uint32(shift)
+        mask[0, group] |= bit_mask
+    return mask
+
+
+class TorchBitmaskApplyBenchmark:
+    params = [[10, 100, 1_000, 10_000, 100_000], [1, 2, 4, 8]]
+    param_names = ["allowed_tokens", "batch"]
+    number = 10
+
+    def setup(self, allowed_tokens, batch):
+        self.device = "cpu"
+        self.allowed_tokens = allowed_tokens
+        self.vocab = 128000
+        self.batch = batch
+
+        self.logits = torch.randn(self.batch, self.vocab, device=self.device)
+
+        mask = torch.from_numpy(
+            generate_sparse_mask(
+                self.batch, self.vocab, allowed_count=self.allowed_tokens
+            )
+        )
+        self.mask = mask.to(self.device)
+
+        self.kernel = torch_kernel
+
+        for _ in range(4):
+            self.kernel(self.logits, self.mask)
+
+    def time_kernel(self, allowed_tokens, batch):
+        self.kernel(self.logits, self.mask)
+
+
+class NumpyBitmaskApplyBenchmark:
+    params = [[10, 100, 1_000, 10_000, 100_000], [1, 2, 4, 8]]
+    param_names = ["allowed_tokens", "batch"]
+    number = 10
+
+    def setup(self, allowed_tokens, batch):
+        self.allowed_tokens = allowed_tokens
+        self.vocab = 128000
+        self.batch = batch
+
+        self.logits = np.random.randn(self.batch, self.vocab).astype(np.float32)
+
+        self.mask = generate_sparse_mask(
+            self.batch, self.vocab, allowed_count=self.allowed_tokens
+        )
+
+        self.kernel = numpy_kernel
+
+        for _ in range(4):
+            self.kernel(self.logits, self.mask)
+
+    def time_kernel(self, allowed_tokens, batch):
+        self.kernel(self.logits, self.mask)
+
+
+class MlxBitmaskApplyBenchmark:
+    params = [[10, 100, 1_000, 10_000, 100_000], [1, 2, 4, 8]]
+    param_names = ["allowed_tokens", "batch"]
+    number = 10
+
+    def setup(self, allowed_tokens, batch):
+        try:
+            import mlx.core as mx
+
+            from outlines_core.kernels.mlx import (
+                _apply_token_bitmask_kernel as mlx_kernel,
+            )
+        except ImportError:
+            raise NotImplementedError
+
+        self.allowed_tokens = allowed_tokens
+        self.vocab = 128000
+        self.batch = batch
+
+        self.logits = mx.array(
+            np.random.randn(self.batch, self.vocab).astype(np.float32)
+        )
+
+        self.mask = mx.array(
+            generate_sparse_mask(
+                self.batch, self.vocab, allowed_count=self.allowed_tokens
+            )
+        )
+
+        self.kernel = mlx_kernel
+
+        # warm up / compile
+        for _ in range(4):
+            self.kernel(self.logits, self.mask)
+
+    def time_kernel(self, allowed_tokens, batch):
+        self.kernel(self.logits, self.mask)
@@ -2,6 +2,7 @@
 from concurrent.futures import ThreadPoolExecutor
 
 import psutil
+
 from outlines_core import Guide, Index, Vocabulary
 
 regex_samples = {
@@ -83,3 +84,21 @@ def peakmem_guides_per_index(self, num_guides):
 
         assert len(objects) == num_guides
         assert final - initial < 5
+
+
+class WriteMaskIntoBenchmark:
+    params = list(regex_samples.keys())
+    param_names = ["regex_key"]
+
+    def setup(self, regex_key):
+        from outlines_core.kernels.torch import allocate_token_bitmask
+
+        self.vocab = Vocabulary.from_pretrained("gpt2")
+        self.mask = allocate_token_bitmask(len(self.vocab))
+        self.index = Index(regex_samples[regex_key], self.vocab)
+        self.guide = Guide(self.index)
+
+    def time_write_mask_into(self, regex_key):
+        self.guide.write_mask_into(
+            self.mask.data_ptr(), self.mask.numel(), self.mask.element_size()
+        )
@@ -0,0 +1,38 @@
+import torch
+
+from outlines_core import Guide, Index, Vocabulary
+from outlines_core.kernels.torch import (
+    _apply_token_bitmask_inplace_kernel,
+    allocate_token_bitmask,
+)
+
+regex_samples = {
+    "email": r"[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?",
+    "complex_phone": "\\+?\\d{1,4}?[-.\\s]?\\(?\\d{1,3}?\\)?[-.\\s]?\\d{1,4}[-.\\s]?\\d{1,4}[-.\\s]?\\d{1,9}",
+    "simple_phone": "\\+?[1-9][0-9]{7,14}",
+    "date": r"([1-9]|0[1-9]|1[0-9]|2[0-9]|3[0-1])(\.|-|/)([1-9]|0[1-9]|1[0-2])(\.|-|/)([0-9][0-9]|19[0-9][0-9]|20[0-9][0-9])|([0-9][0-9]|19[0-9][0-9]|20[0-9][0-9])(\.|-|/)([1-9]|0[1-9]|1[0-2])(\.|-|/)([1-9]|0[1-9]|1[0-9]|2[0-9]|3[0-1])",
+    "time": r"(0?[1-9]|1[0-2]):[0-5]\d\s?(am|pm)?",
+    "ip": r"(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)",
+    "url": r"(https?:\/\/)?([\da-z\.-]+)\.([a-z\.]{2,6})([\/\w \.-]*)*\/?",
+    "ssn": r"\d{3}-\d{2}-\d{4}",
+    "complex_span_constrained_relation_extraction": "(['\"\\ ,]?((?:of|resulting|case|which|cultures|a|core|extreme|selflessness|spiritual|various|However|both|vary|in|other|secular|the|religious|among|moral|and|It|object|worldviews|altruism|traditional|material|aspect|or|life|beings|virtue|is|however|opposite|concern|an|practice|it|for|s|quality|religions|In|Altruism|animals|happiness|many|become|principle|human|selfishness|may|synonym)['\"\\ ,]?)+['\"\\ ,]?\\s\\|\\s([^|\\(\\)\n]{1,})\\s\\|\\s['\"\\ ,]?((?:of|resulting|case|which|cultures|a|core|extreme|selflessness|spiritual|various|However|both|vary|in|other|secular|the|religious|among|moral|and|It|object|worldviews|altruism|traditional|material|aspect|or|life|beings|virtue|is|however|opposite|concern|an|practice|it|for|s|quality|religions|In|Altruism|animals|happiness|many|become|principle|human|selfishness|may|synonym)['\"\\ ,]?)+['\"\\ ,]?(\\s\\|\\s\\(([^|\\(\\)\n]{1,})\\s\\|\\s([^|\\(\\)\n]{1,})\\))*\\n)*",
+}
+
+
+class TorchE2EBenchmark:
+    params = regex_samples.keys()
+
+    def setup(self, pattern_name):
+        self.vocabulary = Vocabulary.from_pretrained("gpt2")
+        self.pattern = regex_samples[pattern_name]
+        self.guide = Guide(Index(self.pattern, self.vocabulary))
+
+        self.mask = allocate_token_bitmask(len(self.vocabulary))
+        self.logits = torch.randn(1, len(self.vocabulary))
+
+    def time_write_mask_and_apply(self, pattern_name):
+        self.guide.write_mask_into(
+            self.mask.data_ptr(), self.mask.numel(), self.mask.element_size()
+        )
+
+        _apply_token_bitmask_inplace_kernel(self.logits, self.mask)
@@ -0,0 +1 @@
+from .outlines_core import Guide, Index, Vocabulary
@@ -0,0 +1,16 @@
+from .outlines_core import (  # noqa: F401
+    BOOLEAN,
+    DATE,
+    DATE_TIME,
+    EMAIL,
+    INTEGER,
+    NULL,
+    NUMBER,
+    STRING,
+    STRING_INNER,
+    TIME,
+    URI,
+    UUID,
+    WHITESPACE,
+    build_regex_from_schema,
+)
@@ -0,0 +1 @@
+"""Token Masking kernel implementations for various backends."""
@@ -0,0 +1,140 @@
+from outlines_core import Guide
+
+try:
+    import mlx.core as mx
+    import numpy as np
+except ImportError as e:
+    missing_dep = "numpy" if "numpy" in str(e) else "mlx"
+    raise ImportError(
+        f"To use the kernels in `outlines_core.kernels.mlx`, {missing_dep} must be installed. You can install it with `pip install {missing_dep}`"
+    ) from e
+
+
+def allocate_token_bitmask(vocab_size: int) -> np.ndarray:
+    return np.full(
+        (1, (vocab_size + 31) // 32),
+        -1,
+        dtype=np.int32,
+    )
+
+
+_KERNEL_SOURCE = r"""
+// Batch index
+uint batch = thread_position_in_grid.y;
+// Element index
+uint elem = thread_position_in_grid.x;
+
+uint bit = ((elem >> 5) < mask_shape[1]) &&
+            ((mask[batch * mask_shape[1] + (elem >> 5)] >> (elem & 31)) & 1);
+
+out[batch * inp_shape[1] + elem] = bit ? inp[batch * inp_shape[1] + elem] : -INFINITY;
+"""
+
+_KERNEL = mx.fast.metal_kernel(
+    name="bitmask_apply_batched",
+    input_names=["inp", "mask"],
+    output_names=["out"],
+    source=_KERNEL_SOURCE,
+)
+
+
+@mx.compile
+def _apply_token_bitmask_kernel(data: mx.array, mask: mx.array) -> mx.array:
+    return _KERNEL(
+        inputs=[data, mask],
+        template=[("T", data.dtype)],
+        grid=(data.shape[1], data.shape[0], 1),
+        threadgroup=(256, 1, 1),
+        output_shapes=[data.shape],
+        output_dtypes=[data.dtype],
+    )[0]
+
+
+def apply_token_bitmask(logits: mx.array, mask_np: np.ndarray) -> mx.array:
+    """
+    Apply a logits bitmask inplace, setting the probability of invalid tokens
+    to -infinity.
+
+    Arguments:
+        logits (mx.array): The logits tensor.
+
+        mask (mx.array): The token bitmask representing the validity of each
+          token in the logits tensor.
+
+    Raises:
+        ValueError: If any of the following conditions are not met:
+            - `mask.dtype` is not `mx.int32`
+            - `mask` is not a 2D array
+            - `logits` is not a 2D array
+            - `mask.shape`shape does not match `logits.shape`
+
+    Returns:
+        None: Modifies the mask array in place.
+    """
+    # makes a copy - non consuming
+    mask = mx.array(mask_np)
+
+    logits = logits if len(logits.shape) != 1 else mx.expand_dims(logits, axis=0)
+    mask = mask if len(mask.shape) != 1 else mx.expand_dims(mask, axis=0)
+
+    if mask.dtype != mx.int32:
+        raise ValueError(
+            f"Invalid mask dtype: Expected `np.int32`, but got `{mask.dtype}`."
+        )
+    elif len(mask.shape) != 2:
+        raise ValueError(
+            f"Invalid mask dimensions: Expected a 2D array, but got {mask.ndim}D."
+        )
+    elif len(logits.shape) != 2:
+        raise ValueError(
+            f"Invalid logits dimensions: Expected a 2D array, but got {logits.ndim}D."
+        )
+    elif mask.shape[0] != logits.shape[0]:
+        raise ValueError(
+            f"Invalid batch size: Expected `mask.shape[0]` ({mask.shape[0]}) to match `logits.shape[0]` ({logits.shape[0]})."
+        )
+    return _apply_token_bitmask_kernel(logits, mask)
+
+
+def fill_next_token_bitmask(guide: Guide, mask: np.ndarray) -> None:
+    """
+    Writes a bitmask to represent the tokens permissible by the current state of the `guide`.
+    Each bit in the bitmask corresponds to a token ID, with a bit value of 1 indicating that
+    the token is allowed and 0 indicating that it is disallowed. This function directly modifies
+    the `mask` array in-place.
+
+    Arguments:
+        guide (Guide): An instance of the `Guide` class that provides the current guidance state.
+        mask (torch.Tensor): A 2D tensor of type `torch.int32` where the bitmask will be written.
+                             The tensor must be contiguous, have a single batch dimension
+                             (shape[0] == 1), and reside on the CPU.
+
+    Raises:
+        ValueError: If any of the following conditions are not met:
+                    - `mask.dtype` is not `np.int32`
+                    - `mask` is not a 2D tensor
+                    - `mask` does not have a single batch dimension (shape[0] != 1)
+                    - `mask` is not contiguous in memory
+                    - `mask` is not on the CPU device
+
+    Returns:
+        None: Modifies the `mask` tensor in-place.
+    """
+    if mask.dtype != np.int32:
+        raise ValueError(
+            f"Invalid mask dtype: Expected `np.int32`, but got `{mask.dtype}`."
+        )
+    elif mask.ndim != 2:
+        raise ValueError(
+            f"Invalid mask dimensions: Expected a 2D array, but got {mask.ndim}D."
+        )
+    elif mask.shape[0] != 1:
+        raise ValueError(
+            f"Invalid batch size: Batch mask writes are not supported. Expected shape[0] == 1, but got shape {mask.shape}."
+        )
+    elif not mask.flags["C_CONTIGUOUS"]:
+        raise ValueError(
+            "Mask array must be contiguous in memory. Use `np.ascontiguousarray(mask)`."
+        )
+
+    return guide.write_mask_into(mask.ctypes.data, mask.size, mask.itemsize)
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+from .outlines_core import Guide, Index, Vocabulary`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+"""Token Masking kernel implementations for various backends."""`