hotfix #11 (#13)

xzyaoi · web-flow · commit c542c23993e1 · 2024-11-15T14:24:27.000+01:00
diff --git a/docker/build_image.sh b/docker/build_image.sh
@@ -1,11 +1,12 @@
 # get cpu arch
 arch=$(uname -m)
 version=$1
+buildtool=$2
 # if version is not provided, raise error
 if [ -z "$version" ]; then
     echo "Please provide version number"
     exit 1
 fi
 echo "Building image for $arch, version $version"
-docker build -f docker/Dockerfile.$arch-cuda . -t ghcr.io/xiaozheyao/scratchpad:${version}dev-$arch --build-arg ARCH=$arch
-docker push ghcr.io/xiaozheyao/scratchpad:${version}dev-$arch
+$buildtool build -f docker/Dockerfile.$arch-cuda . -t ghcr.io/xiaozheyao/scratchpad:${version}dev-$arch --build-arg ARCH=$arch
+$buildtool push ghcr.io/xiaozheyao/scratchpad:${version}dev-$arch
diff --git a/scratchpad/nn/layers/layernorm.py b/scratchpad/nn/layers/layernorm.py
@@ -4,8 +4,6 @@
 import torch.nn as nn
 from flashinfer.norm import (
     fused_add_rmsnorm,
-    # gemma_fused_add_rmsnorm,
-    # gemma_rmsnorm,
     rmsnorm,
 )
 from scratchpad.model_executor.custom_op import CustomOp
@@ -50,45 +48,4 @@ def forward_native(
         if residual is None:
             return x
         else:
-            return x, residual
-
-
-# class GemmaRMSNorm(CustomOp):
-#     def __init__(
-#         self,
-#         hidden_size: int,
-#         eps: float = 1e-6,
-#     ) -> None:
-#         super().__init__()
-#         self.weight = nn.Parameter(torch.zeros(hidden_size))
-#         self.variance_epsilon = eps
-
-#     def forward_native(
-#         self,
-#         x: torch.Tensor,
-#         residual: Optional[torch.Tensor] = None,
-#     ) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
-#         orig_dtype = x.dtype
-#         if residual is not None:
-#             x = x + residual
-#             residual = x
-
-#         x = x.float()
-#         variance = x.pow(2).mean(dim=-1, keepdim=True)
-#         x = x * torch.rsqrt(variance + self.variance_epsilon)
-#         x = x * (1.0 + self.weight.float())
-#         x = x.to(orig_dtype)
-#         return x if residual is None else (x, residual)
-
-#     def forward_cuda(
-#         self,
-#         x: torch.Tensor,
-#         residual: Optional[torch.Tensor] = None,
-#     ) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
-#         if residual is not None:
-#             gemma_fused_add_rmsnorm(
-#                 x, residual, self.weight.data, self.variance_epsilon
-#             )
-#             return x, residual
-#         out = gemma_rmsnorm(x, self.weight.data, self.variance_epsilon)
-#         return out
+            return x, residual