Integer -> Union{Integer,StaticInt} (#86)

Tokazama · chriselrod · web-flow · commit c723b7a2aae1 · 2022-06-03T06:30:19.000-04:00
* Integer -&gt; Union{Integer,StaticInt}

* Update src/cartesianvindex.jl

Co-authored-by: Chris Elrod &lt;elrodc@gmail.com&gt;

* Update src/cartesianvindex.jl

Co-authored-by: Chris Elrod &lt;elrodc@gmail.com&gt;

* Update src/cartesianvindex.jl

Co-authored-by: Chris Elrod &lt;elrodc@gmail.com&gt;

* Update src/cartesianvindex.jl

Co-authored-by: Chris Elrod &lt;elrodc@gmail.com&gt;

* Update src/llvm_intrin/masks.jl

Co-authored-by: Chris Elrod &lt;elrodc@gmail.com&gt;

* Update src/llvm_intrin/intrin_funcs.jl

Co-authored-by: Chris Elrod &lt;elrodc@gmail.com&gt;

* Update src/llvm_intrin/masks.jl

Co-authored-by: Chris Elrod &lt;elrodc@gmail.com&gt;

* Use :Integer

* Update src/llvm_intrin/intrin_funcs.jl

Co-authored-by: Chris Elrod &lt;elrodc@gmail.com&gt;

* Use IntegerTypes more

Co-authored-by: Chris Elrod &lt;elrodc@gmail.com&gt;
diff --git a/src/alignment.jl b/src/alignment.jl
@@ -5,23 +5,23 @@ Return aligned memory address with minimum increment. `align` assumes `n` is a
 power of 2.
 """
 function align end
-@inline align(x::Integer) = (x + Int(register_size() - One())) & Int(-register_size())
+@inline align(x::Union{Integer,StaticInt}) = (x + Int(register_size() - One())) & Int(-register_size())
 @inline align(x::Ptr{T}, arg) where {T} =
   reinterpret(Ptr{T}, align(reinterpret(UInt, x), arg))
 @inline align(x::Ptr{T}) where {T} = reinterpret(Ptr{T}, align(reinterpret(UInt, x)))
-@inline align(x::Integer, n) = (nm1 = n - One(); (x + nm1) & -n)
-@inline align(x::Integer, ::StaticInt{N}) where {N} = (nm1 = N - 1; (x + nm1) & -N)
-@inline align(x::Integer, ::Type{T}) where {T} =
+@inline align(x::Union{Integer,StaticInt}, n) = (nm1 = n - One(); (x + nm1) & -n)
+@inline align(x::Union{Integer,StaticInt}, ::StaticInt{N}) where {N} = (nm1 = N - 1; (x + nm1) & -N)
+@inline align(x::Union{Integer,StaticInt}, ::Type{T}) where {T} =
   align(x, register_size() ÷ static_sizeof(T))
 
 # @generated align(::Val{L}, ::Type{T}) where {L,T} = align(L, T)
-aligntrunc(x::Integer, n) = x & -n
-aligntrunc(x::Integer) = aligntrunc(x, register_size())
-aligntrunc(x::Integer, ::Type{T}) where {T} = aligntrunc(x, register_size() ÷ sizeof(T))
-alignment(x::Integer, N = 64) = reinterpret(Int, x) % N
+aligntrunc(x::Union{Integer,StaticInt}, n) = x & -n
+aligntrunc(x::Union{Integer,StaticInt}) = aligntrunc(x, register_size())
+aligntrunc(x::Union{Integer,StaticInt}, ::Type{T}) where {T} = aligntrunc(x, register_size() ÷ sizeof(T))
+alignment(x::Union{Integer,StaticInt}, N = 64) = reinterpret(Int, x) % N
 
 function valloc(
-  N::Integer,
+  N::Union{Integer,StaticInt},
   ::Type{T} = Float64,
   a = max(register_size(), cache_linesize()),
 ) where {T}
diff --git a/src/cartesianvindex.jl b/src/cartesianvindex.jl
@@ -1,9 +1,9 @@
 
 struct NullStep end
-struct CartesianVIndex{N,T<:Tuple{Vararg{Union{Integer,NullStep},N}}} <:
+struct CartesianVIndex{N,T<:Tuple{Vararg{Union{Int,StaticInt,NullStep},N}}} <:
        Base.AbstractCartesianIndex{N}
   I::T
-  @inline CartesianVIndex(I::T) where {N,T<:Tuple{Vararg{Union{Integer,NullStep},N}}} =
+  @inline CartesianVIndex(I::T) where {N,T<:Tuple{Vararg{Union{Int,StaticInt,NullStep},N}}} =
     new{N,T}(I)
 end
 Base.length(::CartesianVIndex{N}) where {N} = N
@@ -19,7 +19,7 @@ _ndim(::Type{<:Base.AbstractCartesianIndex{N}}) where {N} = N
 # _ndim(::Type{<:AbstractArray{N}}) where {N} = N
 @generated function CartesianVIndex(
   I::T,
-) where {T<:Tuple{Vararg{Union{Integer,CartesianIndex,CartesianVIndex,NullStep}}}}
+) where {T<:Tuple{Vararg{Union{Int,StaticInt,CartesianIndex,CartesianVIndex,NullStep}}}}
   iexpr = Expr(:tuple)
   Tp = T.parameters
   q = Expr(:block)
@@ -45,7 +45,6 @@ _ndim(::Type{<:Base.AbstractCartesianIndex{N}}) where {N} = N
   )
 end
 
-# @inline Base.CartesianIndex(I::Tuple{Vararg{Union{Integer,CartesianIndex,CartesianVIndex,StaticInt}}}) = CartesianVIndex(I)
 
 @generated function _maybestaticfirst(a::Tuple{Vararg{Any,N}}) where {N}
   quote
diff --git a/src/llvm_intrin/intrin_funcs.jl b/src/llvm_intrin/intrin_funcs.jl
@@ -83,7 +83,7 @@ end
 #     %byte = zext i1 %actual to i8
 #     ret i8 %byte""", :Bool, :(Tuple{Bool}), "i8", ["i8"], [:b]))
 # end
-# @generated function expect(i::I, ::Val{N}) where {I <: Integer, N}
+# @generated function expect(i::I, ::Val{N}) where {I <: Union{Integer,StaticInt}, N}
 #     ityp = 'i' * string(8sizeof(I))
 #     llvmcall_expr("declare i1 @llvm.expect.$ityp($ityp, i1)", """
 #     %actual = call $ityp @llvm.expect.$ityp($ityp %0, $ityp $N)
@@ -115,13 +115,13 @@ if Base.libllvm_version ≥ v"12"
   @inline vmin(v1::Vec{W,<:Unsigned}, v2::Vec{W,<:Signed}) where {W} =
     vifelse(v1 < v2, v1, v2)
 else
-  @inline vmax(v1::Vec{W,<:Integer}, v2::Vec{W,<:Integer}) where {W} =
+  @inline vmax(v1::Vec{W,<:Union{Integer,StaticInt}}, v2::Vec{W,<:Union{Integer,StaticInt}}) where {W} =
     vifelse(v1 > v2, v1, v2)
-  @inline vmin(v1::Vec{W,<:Integer}, v2::Vec{W,<:Integer}) where {W} =
+  @inline vmin(v1::Vec{W,<:Union{Integer,StaticInt}}, v2::Vec{W,<:Union{Integer,StaticInt}}) where {W} =
     vifelse(v1 < v2, v1, v2)
 end
-@inline vmax_fast(v1::Vec{W,<:Integer}, v2::Vec{W,<:Integer}) where {W} = vmax(v1, v2)
-@inline vmin_fast(v1::Vec{W,<:Integer}, v2::Vec{W,<:Integer}) where {W} = vmin(v1, v2)
+@inline vmax_fast(v1::Vec{W,<:Union{Integer,StaticInt}}, v2::Vec{W,<:Union{Integer,StaticInt}}) where {W} = vmax(v1, v2)
+@inline vmin_fast(v1::Vec{W,<:Union{Integer,StaticInt}}, v2::Vec{W,<:Union{Integer,StaticInt}}) where {W} = vmin(v1, v2)
 @inline vmax(v1::Vec{W,Bool}, v2::Vec{W,Bool}) where {W} = vor(v1, v2)
 @inline vmin(v1::Vec{W,Bool}, v2::Vec{W,Bool}) where {W} = vand(v1, v2)
 
@@ -141,7 +141,7 @@ for (op, f) ∈ [
 end
 @inline vsqrt(v::AbstractSIMD{W,T}) where {W,T<:IntegerTypes} = vsqrt(float(v))
 @inline vsqrt(v::FloatingTypes) = Base.sqrt_llvm_fast(v)
-@inline vsqrt(v::Integer) = Base.sqrt_llvm_fast(float(v))
+@inline vsqrt(v::Union{Integer,StaticInt}) = Base.sqrt_llvm_fast(float(v))
 # @inline roundeven(v::VecUnroll) = VecUnroll(fmap(roundeven, getfield(v,:data)))
 # @generated function Base.round(::Type{Int64}, v1::Vec{W,T}) where {W, T <: Union{Float32,Float64}}
 #     llvmcall_expr("lrint", W, Int64, (W,), (T,), "")
@@ -366,7 +366,7 @@ for (f, fl) ∈
     @inline $f(s::IntegerTypesHW, v::Vec{W,<:IntegerTypesHW}) where {W} =
       $f(vbroadcast(Val{W}(), s), v)
     @inline $f(a::FloatingTypes, b::FloatingTypes) = Base.FastMath.$fl(a, b)
-    @inline $f(a::Integer, b::Integer) = Base.FastMath.$fl(a, b)
+    @inline $f(a::Union{Integer,StaticInt}, b::Union{Integer,StaticInt}) = Base.FastMath.$fl(a, b)
   end
 end
 
@@ -580,7 +580,7 @@ end
 # @inline ifelse_collapse_mirror(f::F, a, ::Tuple{}, x, ::Tuple{}) where {F} = a, x
 # @inline function ifelse_collapse_mirror(f::F, a, c::Tuple{T}, x, z::Tuple{T}) where {F,T}
 #   b = first(c); y = first(z)
-#   fxy = f(x,y)  
+#   fxy = f(x,y)
 #   ifelse(fxy, a, b), ifelse(fxy, x, y)
 # end
 # @inline function ifelse_collapse_mirror(f::F, a, c::Tuple, x, z::Tuple) where {F}
@@ -727,9 +727,9 @@ function count_zeros_func(W, I, op, tf = 1)
   rettypexpr = :(_Vec{$W,$I})
   llvmcall_expr(decl, instrs, rettypexpr, :(Tuple{$rettypexpr}), vtyp, [vtyp], [:(data(v))])
 end
-# @generated Base.abs(v::Vec{W,I}) where {W, I <: Integer} = count_zeros_func(W, I, "abs", 0)
-@generated vleading_zeros(v::Vec{W,I}) where {W,I<:Integer} = count_zeros_func(W, I, "ctlz")
-@generated vtrailing_zeros(v::Vec{W,I}) where {W,I<:Integer} =
+# @generated Base.abs(v::Vec{W,I}) where {W, I <: Union{Integer,StaticInt}} = count_zeros_func(W, I, "abs", 0)
+@generated vleading_zeros(v::Vec{W,I}) where {W,I<:IntegerTypesHW} = count_zeros_func(W, I, "ctlz")
+@generated vtrailing_zeros(v::Vec{W,I}) where {W,I<:IntegerTypesHW} =
   count_zeros_func(W, I, "cttz")
 
 
@@ -1128,7 +1128,7 @@ end
 @inline inv_approx(v::VecUnroll) = VecUnroll(fmap(inv_approx, getfield(v, :data)))
 
 @inline vinv_fast(v) = vinv(v)
-@inline vinv_fast(v::AbstractSIMD{<:Any,<:Integer}) = vinv_fast(float(v))
+@inline vinv_fast(v::AbstractSIMD{<:Any,<:Union{Integer,StaticInt}}) = vinv_fast(float(v))
 
 @static if (Sys.ARCH === :x86_64) || (Sys.ARCH === :i686)
 
@@ -1258,7 +1258,7 @@ end
   yₙ₊₁ = yₙ - f(yₙ)/f′(yₙ)
   f(yₙ) = 1/yₙ - x
   f′(yₙ) = -1/yₙ²
-  yₙ₊₁ = yₙ + (1/yₙ - x) * yₙ² = yₙ + yₙ - x * yₙ² = 2yₙ - x * yₙ² = yₙ * ( 2 - x * yₙ ) 
+  yₙ₊₁ = yₙ + (1/yₙ - x) * yₙ² = yₙ + yₙ - x * yₙ² = 2yₙ - x * yₙ² = yₙ * ( 2 - x * yₙ )
   yₙ₊₁ = yₙ * ( 2 - x * yₙ )
   """
   @inline function vinv_fast(v::AbstractSIMD{W,Float32}) where {W}
diff --git a/src/llvm_intrin/masks.jl b/src/llvm_intrin/masks.jl
@@ -322,7 +322,7 @@ end
 @generated max_mask(::Type{Mask{W,U}}) where {W,U} =
   EVLMask{W,U}(one(U) << W - one(U), W % UInt32)
 
-@generated function valrem(::Union{Val{W},StaticInt{W}}, l::T) where {W,T<:Integer}
+@generated function valrem(::Union{Val{W},StaticInt{W}}, l::T) where {W,T<:Union{Integer,StaticInt}}
   ex = ispow2(W) ? :(l & $(T(W - 1))) : Expr(:call, Base.urem_int, :l, T(W))
   Expr(:block, Expr(:meta, :inline), ex)
 end
@@ -338,7 +338,7 @@ end
 @generated bzhi(a::UInt32, b::UInt32) = bzhi_quote(32)
 @generated bzhi(a::UInt64, b::UInt64) = bzhi_quote(64)
 
-# @generated function _mask(::Union{Val{W},StaticInt{W}}, l::I, ::True) where {W,I<:Integer}
+# @generated function _mask(::Union{Val{W},StaticInt{W}}, l::I, ::True) where {W,I<:Union{Integer,StaticInt}}
 #   # if `has_opmask_registers()` then we can use bitmasks directly, so we create them via bittwiddling
 #   M = mask_type(W)
 #   quote # If the arch has opmask registers, we can generate a bitmask and then move it into the opmask register
@@ -347,7 +347,7 @@ end
 #     EVLMask{$W,$M}($(typemax(M)) >>> ($(M(8sizeof(M))-1) - evl), evl + one(evl))
 #   end
 # end
-@generated function _mask_bzhi(::Union{Val{W},StaticInt{W}}, l::I) where {W,I<:Integer}
+@generated function _mask_bzhi(::Union{Val{W},StaticInt{W}}, l::I) where {W,I<:Union{Integer,StaticInt}}
   U = mask_type_symbol(W)
   T = W > 32 ? :UInt64 : :UInt32
   quote
@@ -357,21 +357,21 @@ end
     EVLMask{$W,$U}(bzhi(-1 % $T, m) % $U, m)
   end
 end
-# @inline function _mask_bzhi(::Union{Val{W},StaticInt{W}}, l::I) where {W,I<:Integer}
+# @inline function _mask_bzhi(::Union{Val{W},StaticInt{W}}, l::I) where {W,I<:Union{Integer,StaticInt}}
 #   U = mask_type(StaticInt(W))
 #   # m = ((l) % UInt32) & ((W-1) % UInt32)
 #   m = valrem(StaticInt{W}(), l % UInt32)
 #   m = Core.ifelse((m % UInt8) == 0x00, W % UInt32, m)
 #   # m = Core.ifelse(zero(m) == m, -1 % UInt32, m)
 #   EVLMask{W,U}(bzhi(-1 % UInt32, m) % U, m)
 # end
-# @inline function _mask(::Union{Val{W},StaticInt{W}}, l::I, ::True) where {W,I<:Integer}
+# @inline function _mask(::Union{Val{W},StaticInt{W}}, l::I, ::True) where {W,I<:Union{Integer,StaticInt}}
 #   U = mask_type(StaticInt(W))
 #   m = ((l-one(l)) % UInt32) & ((W-1) % UInt32)
 #   m += one(m)
 #   EVLMask{W,U}(bzhi(-1 % UInt32, m) % U, m)
 # end
-# @generated function _mask(::Union{Val{W},StaticInt{W}}, l::I, ::True) where {W,I<:Integer}
+# @generated function _mask(::Union{Val{W},StaticInt{W}}, l::I, ::True) where {W,I<:Union{Integer,StaticInt}}
 #   M = mask_type_symbol(W)
 #   quote
 #     $(Expr(:meta,:inline))
@@ -426,16 +426,16 @@ end
   l::I,
   ::StaticInt{RS},
   ::True,
-) where {W,RS,I<:Integer} = mask_cmp_quote(W, RS, true)
+) where {W,RS,I<:Union{Integer,StaticInt}} = mask_cmp_quote(W, RS, true)
 @generated _mask_cmp(
   ::Union{Val{W},StaticInt{W}},
   l::I,
   ::StaticInt{RS},
   ::False,
-) where {W,RS,I<:Integer} = mask_cmp_quote(W, RS, false)
-@generated _mask(::Union{Val{W},StaticInt{W}}, l::I, ::True) where {W,I<:Integer} =
+) where {W,RS,I<:Union{Integer,StaticInt}} = mask_cmp_quote(W, RS, false)
+@generated _mask(::Union{Val{W},StaticInt{W}}, l::I, ::True) where {W,I<:Union{Integer,StaticInt}} =
   mask_shift_quote(W, true)
-@generated function _mask(::Union{Val{W},StaticInt{W}}, l::I, ::False) where {W,I<:Integer}
+@generated function _mask(::Union{Val{W},StaticInt{W}}, l::I, ::False) where {W,I<:Union{Integer,StaticInt}}
   # Otherwise, it's probably more efficient to use a comparison, as this will probably create some type that can be used directly for masked moves/blends/etc
   if W > 16
     Expr(
@@ -468,10 +468,10 @@ end
   _mask(StaticInt(W), L, has_feature(Val(:x86_64_avx512f)) & ge_one_fma(cpu_name()))
 @inline mask(::Union{Val{W},StaticInt{W}}, ::StaticInt{L}) where {W,L} =
   _mask(StaticInt(W), L, has_feature(Val(:x86_64_avx512f)) & ge_one_fma(cpu_name()))
-@inline mask(::Type{T}, l::Integer) where {T} =
+@inline mask(::Type{T}, l::Union{Integer,StaticInt}) where {T} =
   _mask(pick_vector_width(T), l, has_feature(Val(:x86_64_avx512f)) & ge_one_fma(cpu_name()))
 
-# @generated function masktable(::Union{Val{W},StaticInt{W}}, rem::Integer) where {W}
+# @generated function masktable(::Union{Val{W},StaticInt{W}}, rem::Union{Integer,StaticInt}) where {W}
 #     masks = Expr(:tuple)
 #     for w ∈ 0:W-1
 #         push!(masks.args, data(mask(Val(W), w == 0 ? W : w)))
@@ -507,7 +507,7 @@ end
 # @inline tounsigned(m::Vec{W,Bool}) where {W} = getfield(tomask(m), :u)
 @inline tounsigned(v) = getfield(tomask(v), :u)
 
-@generated function vrem(m::Mask{W,U}, ::Type{I}) where {W,U,I<:Integer}
+@generated function vrem(m::Mask{W,U}, ::Type{I}) where {W,U,I<:IntegerTypesHW}
   bits = 8sizeof(I)
   instrs = String[]
   truncate_mask!(instrs, '0', W, 0)
@@ -523,7 +523,7 @@ end
 Vec(m::Mask{W}) where {W} = m % int_type(Val{W}())
 
 # @inline getindexzerobased(m::Mask, i) = (getfield(m, :u) >>> i) % Bool
-# @inline function extractelement(m::Mask{W}, i::Integer) where {W}
+# @inline function extractelement(m::Mask{W}, i::Union{Integer,StaticInt}) where {W}
 #     @boundscheck i > W && throw(BoundsError(m, i))
 #     getindexzerobased(m, i)
 # end
@@ -607,7 +607,7 @@ for (f, cond) ∈ [(:veq, "eq"), (:vne, "ne")]
   @eval @generated function $f(
     v1::Vec{W,T1},
     v2::Vec{W,T2},
-  ) where {W,T1<:Integer,T2<:Integer}
+  ) where {W,T1<:IntegerTypesHW,T2<:IntegerTypesHW}
     if sizeof(T1) != sizeof(T2)
       return Expr(
         :block,
@@ -782,8 +782,8 @@ end
 @inline Base.flipsign(x::AbstractSIMD, y::Real) = ifelse(y > zero(y), x, -x)
 @inline Base.flipsign(x::Real, y::AbstractSIMD) = ifelse(y > zero(y), x, -x)
 @inline Base.flipsign(x::Signed, y::AbstractSIMD) = ifelse(y > zero(y), x, -x)
-@inline Base.isodd(x::AbstractSIMD{W,T}) where {W,T<:Integer} = (x & one(T)) != zero(T)
-@inline Base.iseven(x::AbstractSIMD{W,T}) where {W,T<:Integer} = (x & one(T)) == zero(T)
+@inline Base.isodd(x::AbstractSIMD{W,T}) where {W,T<:Union{Integer,StaticInt}} = (x & one(T)) != zero(T)
+@inline Base.iseven(x::AbstractSIMD{W,T}) where {W,T<:Union{Integer,StaticInt}} = (x & one(T)) == zero(T)
 
 @generated function vifelse(m::Vec{W,Bool}, v1::Vec{W,T}, v2::Vec{W,T}) where {W,T}
   typ = LLVM_TYPES[T]
@@ -1024,4 +1024,3 @@ end
 @inline Base.min(x::AbstractMask, y::AbstractMask) = x & y
 @inline Base.FastMath.max_fast(x::AbstractMask, y::AbstractMask) = x | y
 @inline Base.FastMath.min_fast(x::AbstractMask, y::AbstractMask) = x & y
-
diff --git a/src/llvm_intrin/memory_addr.jl b/src/llvm_intrin/memory_addr.jl
@@ -953,7 +953,7 @@ end
 
 @inline function _vload_scalar(
   ptr::Ptr{Bit},
-  i::Integer,
+  i::Union{Integer,StaticInt},
   ::A,
   ::StaticInt{RS},
 ) where {RS,A<:StaticBool}
diff --git a/src/llvm_intrin/unary_ops.jl b/src/llvm_intrin/unary_ops.jl
@@ -31,8 +31,8 @@ end
 @inline vabs(v::AbstractSIMD{W,<:Signed}) where {W} = ifelse(v > 0, v, -v)
 
 @inline vround(v) = round(v)
-@inline vround(v::AbstractSIMD{W,<:Integer}) where {W} = v
-@inline vround(v::AbstractSIMD{W,<:Integer}, ::RoundingMode) where {W} = v
+@inline vround(v::AbstractSIMD{W,<:Union{Integer,StaticInt}}) where {W} = v
+@inline vround(v::AbstractSIMD{W,<:Union{Integer,StaticInt}}, ::RoundingMode) where {W} = v
 
 
 function bswap_quote(W::Int, T::Symbol, st::Int)::Expr
diff --git a/src/promotion.jl b/src/promotion.jl
@@ -29,7 +29,7 @@ function _ff_promote_rule(
   ::Type{T1},
   ::Type{T2},
   ::Val{W},
-) where {T1<:Integer,T2<:Integer,W}
+) where {T1<:Union{Integer,StaticInt},T2<:Union{Integer,StaticInt},W}
   T_canon = promote_type(T1, T2)
   __ff_maybe_promote_int(
     lt(pick_vector_width(T_canon), StaticInt{W}()),
@@ -38,7 +38,7 @@ function _ff_promote_rule(
     Val{W}(),
   )
 end
-ff_promote_rule(::Type{T1}, ::Type{T2}, ::Val{W}) where {T1<:Integer,T2<:Integer,W} =
+ff_promote_rule(::Type{T1}, ::Type{T2}, ::Val{W}) where {T1<:Union{Integer,StaticInt},T2<:Union{Integer,StaticInt},W} =
   _ff_promote_rule(T1, T2, Val{W}())
 ff_promote_rule(
   ::Type{T1},
@@ -107,11 +107,11 @@ issigned(::Type{T}) where {T} = nothing
  Promote, favoring <:Signed or <:Unsigned of first arg.
 """
 @inline promote_div(
-  x::Union{Integer,AbstractSIMD{<:Any,<:Integer}},
-  y::Union{Integer,AbstractSIMD{<:Any,<:Integer}},
+  x::Union{Integer,StaticInt,AbstractSIMD{<:Any,<:Union{Integer,StaticInt}}},
+  y::Union{Integer,StaticInt,AbstractSIMD{<:Any,<:Union{Integer,StaticInt}}},
 ) = promote_div(x, y, issigned(x))
 @inline promote_div(x, y) = promote(x, y)
-@inline promote_div(x, y, ::Nothing) = promote(x, y) # for Integers that are neither Signed or Unsigned, e.g. Bool
+@inline promote_div(x, y, ::Nothing) = promote(x, y) # for Union{Integer,StaticInt}s that are neither Signed or Unsigned, e.g. Bool
 @inline function promote_div(x::T1, y::T2, ::True) where {T1,T2}
   T = promote_type(T1, T2)
   signed(x % T), signed(y % T)
diff --git a/src/ranges.jl b/src/ranges.jl
@@ -35,7 +35,7 @@ F - static multiplicative factor
   ::Val{O},
   ::Val{F},
   ::StaticInt{SIRS},
-) where {W,I<:Integer,O,F,SIRS}
+) where {W,I<:Union{Integer,StaticInt},O,F,SIRS}
   isone(W) && return Expr(:block, Expr(:meta, :inline), :(Base.add_int(i, $(O % I))))
   bytes = pick_integer_bytes(W, sizeof(I), SIRS)
   bits = 8bytes
@@ -55,7 +55,7 @@ F - static multiplicative factor
     Vec($LLVMCALL($instrs, _Vec{$W,$jtypesym}, Tuple{$jtypesym}, $iexpr))
   end
 end
-@inline function vrangeincr(::Val{W}, i::I, ::Val{O}, ::Val{F}) where {W,I<:Integer,O,F}
+@inline function vrangeincr(::Val{W}, i::I, ::Val{O}, ::Val{F}) where {W,I<:Union{Integer,StaticInt},O,F}
   _vrangeincr(Val{W}(), i, Val{O}(), Val{F}(), simd_integer_register_size())
 end
 @generated function vrangeincr(
diff --git a/src/special/misc.jl b/src/special/misc.jl
@@ -82,7 +82,7 @@ end
   exp2(Base.FastMath.log2_fast(v) * x)
 @inline Base.literal_pow(::typeof(^), x::AbstractSIMD, ::Val{N}) where {N} =
   pow_by_square(x, StaticInt(N))
-# @inline relu(x) = (y = zero(x); ifelse(x > y, x, y)) 
+# @inline relu(x) = (y = zero(x); ifelse(x > y, x, y))
 @inline relu(x) = (y = zero(x); ifelse(x < y, y, x))
 
 Base.sign(v::AbstractSIMD) = ifelse(v > 0, one(v), -one(v))
@@ -95,12 +95,12 @@ Base.sign(v::AbstractSIMD) = ifelse(v > 0, one(v), -one(v))
   x::AbstractSIMD{W,T},
   y::AbstractSIMD{W,T},
   ::RoundingMode{:Down},
-) where {W,T<:Integer}
+) where {W,T<:IntegerTypes}
   d = div(x, y)
   d - (signbit(x ⊻ y) & (d * y != x))
 end
 
-@inline Base.mod(x::AbstractSIMD{W,T}, y::AbstractSIMD{W,T}) where {W,T<:Integer} =
+@inline Base.mod(x::AbstractSIMD{W,T}, y::AbstractSIMD{W,T}) where {W,T<:IntegerTypes} =
   ifelse(y == -1, zero(x), x - fld(x, y) * y)
 
 @inline Base.mod(x::AbstractSIMD{W,T}, y::AbstractSIMD{W,T}) where {W,T<:Unsigned} =
@@ -135,7 +135,7 @@ end
   signed(mod(_x, _y))
 end
 
-@inline Base.mod(i::AbstractSIMD{<:Any,<:Integer}, r::AbstractUnitRange{<:Integer}) =
+@inline Base.mod(i::AbstractSIMD{<:Any,<:IntegerTypes}, r::AbstractUnitRange{<:IntegerTypes}) =
   mod(i - first(r), length(r)) + first(r)
 
 @inline Base.mod(x::AbstractSIMD, y::NativeTypes) = mod(promote_div(x, y)...)
@@ -154,7 +154,7 @@ end
 @inline Base.FastMath.hypot_fast(x::AbstractSIMD, y::AbstractSIMD) =
   sqrt(Base.FastMath.add_fast(Base.FastMath.mul_fast(x, x), Base.FastMath.mul_fast(y, y)))
 
-@inline Base.clamp(x::AbstractSIMD{<:Any,<:Integer}, r::AbstractUnitRange{<:Integer}) =
+@inline Base.clamp(x::AbstractSIMD{<:Any,<:IntegerTypes}, r::AbstractUnitRange{<:IntegerTypes}) =
   clamp(x, first(r), last(r))
 
 @inline function Base.gcd(
diff --git a/src/strided_pointers/stridedpointers.jl b/src/strided_pointers/stridedpointers.jl