cleanup

lucidrains · lucidrains · commit 3622864dfd17 · 2022-11-27T08:20:35.000-08:00
diff --git a/README.md b/README.md
@@ -64,10 +64,11 @@ $ python setup.py test
 
 ## Todo
 
+- [x] move xi and xj separate project and sum logic into Conv class
+
 - [ ] figure out DTP heuristic
 - [ ] move self interacting key / value production into Conv, fix no pooling in conv with self interaction
 - [ ] start moving some spherical harmonic stuff to cpp or nim
-- [ ] move xi and xj separate project and sum logic into Conv class
 
 ## Citations
 
diff --git a/equiformer_pytorch/equiformer_pytorch.py b/equiformer_pytorch/equiformer_pytorch.py
@@ -179,12 +179,16 @@ def forward(self, x):
         return output
 
 @beartype
-class Conv(nn.Module):
+class TP(nn.Module):
+    """ 'Tensor Product' - in the equivariant sense """
+
     def __init__(
         self,
         fiber_in: Tuple[int, ...],
         fiber_out: Tuple[int, ...],
         self_interaction = True,
+        project_xi_xj = True,   # whether to project xi and xj and then sum, as in paper
+        project_out = True,     # whether to do a project out after the "tensor product"
         pool = True,
         edge_dim = 0,
         splits = 4
@@ -194,27 +198,33 @@ def __init__(
         self.fiber_out = fiber_out
         self.edge_dim = edge_dim
         self.self_interaction = self_interaction
+        self.pool = pool
+        self.splits = splits # for splitting the computation of kernel and basis, to reduce peak memory usage
 
-        self.kernel_unary = nn.ModuleDict()
+        self.project_xi_xj = project_xi_xj
+        if project_xi_xj:
+            self.to_xi = Linear(fiber_in, fiber_in)
+            self.to_xj = Linear(fiber_in, fiber_in)
 
-        self.splits = splits # for splitting the computation of kernel and basis, to reduce peak memory usage
+        self.kernel_unary = nn.ModuleDict()
 
         for (di, mi), (do, mo) in fiber_product(self.fiber_in, self.fiber_out):
-            self.kernel_unary[f'({di},{do})'] = PairwiseConv(di, mi, do, mo, edge_dim = edge_dim)
-
-        self.pool = pool
+            self.kernel_unary[f'({di},{do})'] = PairwiseTP(di, mi, do, mo, edge_dim = edge_dim)
 
         if self_interaction:
             assert self.pool, 'must pool edges if followed with self interaction'
             self.self_interact = Linear(fiber_in, fiber_out)
 
+        self.project_out = project_out
+        if project_out:
+            self.to_out = Linear(fiber_out, fiber_out)
+
     def forward(
         self,
         inp,
         edge_info: EdgeInfo,
         rel_dist = None,
-        basis = None,
-        neighbors = None
+        basis = None
     ):
         splits = self.splits
         neighbor_indices, neighbor_masks, edges = edge_info
@@ -231,8 +241,10 @@ def forward(
 
         # neighbors
 
-        neighbors_separate_embed = exists(neighbors)
-        neighbors = default(neighbors, inp)
+        if self.project_xi_xj:
+            source, target = self.to_xi(inp), self.to_xj(inp)
+        else:
+            source, target = inp, inp
 
         # go through every permutation of input degree type to output degree type
 
@@ -242,11 +254,11 @@ def forward(
             for degree_in, m_in in enumerate(self.fiber_in):
                 etype = f'({degree_in},{degree_out})'
 
-                xi, xj = inp[degree_in], neighbors[degree_in]
+                xi, xj = source[degree_in], target[degree_in]
 
                 x = batched_index_select(xj, neighbor_indices, dim = 1)
 
-                if neighbors_separate_embed:
+                if self.project_xi_xj:
                     xi = rearrange(xi, 'b i d m -> b i 1 d m')
                     x = x + xi
 
@@ -280,6 +292,9 @@ def forward(
             self_interact_out = self.self_interact(inp)
             outputs = residual_fn(outputs, self_interact_out)
 
+        if self.project_out:
+            outputs = self.to_out(outputs)
+
         return outputs
 
 class RadialFunc(nn.Module):
@@ -311,7 +326,7 @@ def forward(self, x):
         y = self.net(x)
         return rearrange(y, '... (o i f) -> ... o 1 i 1 f', i = self.in_dim, o = self.out_dim)
 
-class PairwiseConv(nn.Module):
+class PairwiseTP(nn.Module):
     def __init__(
         self,
         degree_in,
@@ -430,11 +445,7 @@ def __init__(
         self.prenorm = Norm(fiber)
 
         self.to_q = Linear(fiber, hidden_fiber)
-
-        self.to_xi = Linear(fiber, fiber)
-        self.to_xj = Linear(fiber, fiber)
-
-        self.to_kv = Conv(fiber, kv_hidden_fiber, edge_dim = edge_dim, pool = False, self_interaction = False, splits = splits)
+        self.to_kv = TP(fiber, kv_hidden_fiber, edge_dim = edge_dim, pool = False, self_interaction = False, splits = splits)
         self.to_self_kv = Linear(fiber, kv_hidden_fiber) if attend_self else None
 
         self.to_out = Linear(hidden_fiber, fiber)
@@ -459,11 +470,8 @@ def forward(
 
         queries     = self.to_q(features)
 
-        xi, xj      = self.to_xi(features), self.to_xj(features)
-
         keyvalues   = self.to_kv(
-            xi,
-            neighbors = xj,
+            features,
             edge_info = edge_info,
             rel_dist = rel_dist,
             basis = basis
@@ -557,16 +565,9 @@ def __init__(
         intermediate_fiber = tuple_set_at_index(value_hidden_fiber, 0, sum(attn_hidden_dims) + type0_dim + htype_dims)
         self.intermediate_type0_split = [*attn_hidden_dims, type0_dim + htype_dims]
 
-        # linear project xi and xj separately
-
-        self.to_xi = Linear(fiber, fiber)
-        self.to_xj = Linear(fiber, fiber)
-
         # main branch tensor product
 
-        self.to_attn_and_v = Conv(fiber, intermediate_fiber, edge_dim = edge_dim, pool = False, self_interaction = False, splits = splits)
-
-        self.post_to_attn_and_v_linear = Linear(intermediate_fiber, intermediate_fiber)
+        self.to_attn_and_v = TP(fiber, intermediate_fiber, edge_dim = edge_dim, pool = False, self_interaction = False, splits = splits)
 
         # non-linear projection of attention branch into the attention logits
 
@@ -601,19 +602,13 @@ def forward(
 
         features = self.prenorm(features)
 
-        xi = self.to_xi(features)
-        xj = self.to_xj(features)
-
         intermediate = self.to_attn_and_v(
-            xi,
-            neighbors = xj,
+            features,
             edge_info = edge_info,
             rel_dist = rel_dist,
             basis = basis
         )
 
-        intermediate = self.post_to_attn_and_v_linear(intermediate)
-
         *attn_branch_type0, value_branch_type0 = intermediate[0].split(self.intermediate_type0_split, dim = -2)
 
         intermediate[0] = value_branch_type0
@@ -739,11 +734,11 @@ def __init__(
 
         # define fibers and dimensionality
 
-        conv_kwargs = dict(edge_dim = edge_dim, splits = splits)
+        tp_kwargs = dict(edge_dim = edge_dim, splits = splits)
 
         # main network
 
-        self.conv_in  = Conv(self.dim_in, self.dim, **conv_kwargs)
+        self.tp_in  = TP(self.dim_in, self.dim, **tp_kwargs)
 
         # trunk
 
@@ -879,7 +874,7 @@ def forward(
 
         # project in
 
-        x = self.conv_in(x, edge_info, rel_dist = neighbor_rel_dist, basis = basis)
+        x = self.tp_in(x, edge_info, rel_dist = neighbor_rel_dist, basis = basis)
 
         # transformer layers
 
diff --git a/equiformer_pytorch/version.py b/equiformer_pytorch/version.py
@@ -1,3 +1,3 @@
-__version__ = '0.0.17'
+__version__ = '0.0.21'
 
 __cuda_pkg_name__ = f'equiformer_pytorch_cuda_{__version__.replace(".", "_")}'

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,3 @@`
`1`		`-__version__ = '0.0.17'`
	`1`	`+__version__ = '0.0.21'`
`2`	`2`
`3`	`3`	`__cuda_pkg_name__ = f'equiformer_pytorch_cuda_{__version__.replace(".", "_")}'`