Made lattice/parity problems harder. Minor comment improvements on human_eval.py. Added puzzles.json to the repo

akalai · akalai · commit 6d76d8154a29 · 2021-10-19T10:21:12.000-04:00
diff --git a/generators/human_eval.py b/generators/human_eval.py
@@ -7,47 +7,20 @@
 from typing import List
 
 """
-Some that came out especially nicely as puzzles:
-ParenthesesPermutation
-Derivative
-Frac/ClosestInteger
-HeronTriangle
-RomanNumerals
-ClosestPalindrome
-WildSort
-Intersperse
-SimplifyProductFraction
-Fib4
-MinSquaredDeviation
-DiffChars
-RotateString
-EvaluateOperators
-Grader
-Median
-TripleZeroSum
-PrimeFib
-
-Some that weren't such natural puzzles:
-CircularShiftNum
-ReplaceMe
-MinSubArraySum
-Buckets
-OddEvenSum
-FindStrangeSum
-EvenSqure
-StrongestExtension
-HungryRabbits
-ReverseCase
-MatchBrackets
-ListTotal
-BelowThreshold
-RemoveVowels
-"""
+Some came out especially nicely as puzzles:
+ParenthesesPermutation, Derivative, Frac, HeronTriangle, RomanNumerals, ClosestPalindrome, WildSort, Intersperse,
+SimplifyProductFraction, Fib4, DiffChars, RotateString, EvaluateOperators, Grader, TripleZeroSum, PrimeFib
 
+Some weren't such natural puzzles:
+CircularShiftNum, ReplaceMe, MinSubArraySum, Buckets, OddEvenSum, FindStrangeSum, EvenSqure, StrongestExtension
+HungryRabbits, ReverseCase, MatchBrackets, ListTotal, BelowThreshold, RemoveVowels
 
-# See https://github.com/microsoft/PythonProgrammingPuzzles/wiki/How-to-add-a-puzzle to learn about adding puzzles
+In many cases, the original problem wasn't naturally a puzzle but it inspired a nice loosely-related puzzle:
+ZobristCollision, EvenBetween, MinSquaredDeviation, Median
+"""
 
 
+# See https://github.com/microsoft/PythonProgrammingPuzzles/wiki/How-to-add-a-puzzle to learn about adding puzzles
 
 class FindCloseElements(PuzzleGenerator):
     """Inspired by [HumanEval](https://github.com/openai/human-eval) \\#0"""
@@ -163,10 +136,10 @@ def sat(firsts: List[int], balances=[[2, 7, -2, 4, 3, -15, 10, -45, 3], [3, 4, -
         Given a list of numbers which represent bank deposits and withdrawals, find the *first* negative balance.
 
         Sample Input:
-        [12, -5, 3, -99, 14, 88, -99]
+        [[12, -5, 3, -99, 14, 88, -99], [-1, 2, 5]]
 
         Sample Output:
-        -89
+        [-89, -1]
         """
         for i, bals in enumerate(balances):
             total = 0
@@ -1716,7 +1689,8 @@ def gen_random(self):
 
 class Median(PuzzleGenerator):
     """
-    One definition of the median is a number that minimizes the sum of absolute deviations.
+    One definition of the median is a number that minimizes the sum of absolute deviations. When there are an
+    even number of items, there is an interval of valid solutions.
 
     Inspired by [HumanEval](https://github.com/openai/human-eval) \\#47
     """
@@ -4018,8 +3992,11 @@ class ParenthesesPermutation(PuzzleGenerator):
     """
 
     @staticmethod
-    def sat(perm: str,
-            s="))(  )()()() )))(( ))))((( )))))(((( ))))))))((((((( ))))))((((( )))))))(((((( )))))))))(((((((  (((((((((("):
+    def sat(
+            perm: str,
+            s="))(  )()()() )))(( ))))((( )))))(((( ))))))))((((((( ))))))((((( " +
+              ")))))))(((((( )))))))))(((((((  (((((((((("
+    ):
         """The string s consists of groups of parentheses separated by spaces.
         Permute the groups such that the parentheses match.
 
@@ -5391,7 +5368,11 @@ def gen_random(self):
 
 
 class ZobristCollision(PuzzleGenerator):
-    """Inspired by [HumanEval](https://github.com/openai/human-eval) \\#162"""
+    """Inspired by [HumanEval](https://github.com/openai/human-eval) \\#162
+
+    The original problem was to compute an MD5 hash. This puzzle is a problem in the space of hashing, but of a
+    different nature.
+    """
 
     @staticmethod
     def sat(positions: List[List[int]]):
@@ -5433,7 +5414,13 @@ def zobrist(pos):
 
 
 class EvenBetween(PuzzleGenerator):
-    """Inspired by [HumanEval](https://github.com/openai/human-eval) \\#163"""
+    """Inspired by [HumanEval](https://github.com/openai/human-eval) \\#163
+
+    The original problem was trivial to list the even single-digit numbers between two numbers:
+    `a=2, b=12` => `[4, 6, 8]`. In this puzzle, we consider the string of even numbers formed when counting from
+    `a` to `b`, e.g., `"1618202224262830"` when counting from `15` to `30`. The puzzle is, given such a string,
+    find `a` and `b`.
+    """
 
     @staticmethod
     def sat(ab: List[int], s="3298832990329923299432996329983300033002"):
@@ -5468,5 +5455,3 @@ def gen_random(self):
 
 if __name__ == "__main__":
     PuzzleGenerator.debug_problems()
-
-
diff --git a/generators/lattices.py b/generators/lattices.py
@@ -18,18 +18,37 @@ class LearnParity(PuzzleGenerator):
     The vectors are encoded as binary integers for succinctness.
     """
 
+    # vecs below generated by vecs = LearnParity.rand_parity_problem(random.Random(28562407), d=63)
+
     @staticmethod
-    def sat(inds: List[int], vecs=[169, 203, 409, 50, 37, 479, 370, 133, 53, 159, 161, 367, 474, 107, 82, 447, 385]):
+    def sat(inds: List[int], vecs=[8543342634111025532, 8335192666369313368, 2359039407982105779, 4172548441791366513,
+                                   1256349095522986569, 3754463859322679595, 1562879970152915618, 1933016518061876369,
+                                   5920060919607788629, 8545759471656960221, 2934241949774725291, 559495833580308526,
+                                   5239436672544732707, 5865707252111994906, 8310678944230832071, 4595527784831581592,
+                                   4348871153851862010, 5198370132175169882, 3748480974791545460, 1215135748294622536,
+                                   4321487173746421746, 9012812639700145153, 588387599697000986, 5003829835901037543,
+                                   7754881381173342129, 2635789994388296837, 3222773777603033590, 5790284924977099989,
+                                   7540575369379211274, 7898971930608516039, 27260728996582582, 1792453914477410383,
+                                   8726418386455953809, 9193001185022172125, 3515388340741601364, 6217726337930929836,
+                                   1038687698871580494, 1892601486162604802, 3633356355444530940, 108334555669330693,
+                                   1955821183884414243, 5681081121990060330, 5791800194327455183, 8459367068223249929,
+                                   4271428016720060690, 913733008909519396, 2233236350093301187, 6538503022239131288,
+                                   5292485269677307644, 4615671355181378169, 2605305508625596241, 4954529961471509975,
+                                   2312963580097644831, 888555840551788245, 4152336321587083789, 8978251650218883651,
+                                   2567641184250287470, 2168893575221172018, 4358821646257958779, 3102433300308778243,
+                                   4185793889128296420, 6687096428156463254, 4143873353280484310, 8454616559174688585,
+                                   6589014033410725016, 5903549622062684554, 2388718494916838667, 8850145667696469408,
+                                   5068285804151890745, 2981241929741282230, 79408177335937724, 1711542430102927280]):
         """
-        Parity learning: Given binary vectors in a subspace, find the secret set $S$ of indices such that:
-        $$sum_{i \in S} x_i = 1 (mod 2)$$
+        Parity learning: Given binary vectors in a subspace, find the secret set S of indices such that:
+        $\\sum_{i \in S} x_i = 1 (mod 2)$
         """
         return all(sum((v >> i) & 1 for i in inds) % 2 == 1 for v in vecs)
 
     @staticmethod
     def sol(vecs):
         # Gaussian elimination
-        d = 0 # decode vectors into arrays
+        d = 0  # decode vectors into arrays
         m = max(vecs)
         while m:
             m >>= 1
@@ -55,16 +74,23 @@ def sol(vecs):
 
         return [i for i in range(d) if pool[i][-1]]
 
-    def gen_random(self):
-        d = self.random.randrange(2, self.random.choice([5, 10, 20, 100]))
+    @staticmethod
+    def rand_parity_problem(rand, d=63):
         secret = None
         while not secret:
-            secret = [i for i in range(d) if self.random.randrange(2)]
-        num_vecs = self.random.randrange(d, 5 * d)
-        vecs = [[self.random.randrange(2) for _ in range(d)] for i in range(num_vecs)]
+            secret = [i for i in range(d) if rand.randrange(2)]
+        num_vecs = d + 9
+        vecs = [[rand.randrange(2) for _ in range(d)] for i in range(num_vecs)]
         for v in vecs:
             v[secret[0]] = (1 + sum([v[i] for i in secret[1:]])) % 2
-        vecs =  [sum(1 << i for i, b in enumerate(v) if b) for v in vecs] # encode into ints
+        vecs = [sum(1 << i for i, b in enumerate(v) if b) for v in vecs]  # encode into ints
+        return vecs
+
+    def gen_random(self):
+        vecs = self.rand_parity_problem(
+            self.random,
+            d=self.random.randrange(2, self.random.choice([5, 10, 20, 100]))
+        )
         self.add(dict(vecs=vecs))
 
 
@@ -75,10 +101,44 @@ class LearnParityWithNoise(PuzzleGenerator):
     [Parity learning problem](https://en.wikipedia.org/w/index.php?title=Parity_learning)
     runs in time $2^(d/(log d))$"""
 
+    # vecs below generated by LearnParityWithNoise.rand_parity_problem(random.Random(852352407), d=63)
+
     multiplier = 40  # hard puzzle, takes longer to test
 
     @staticmethod
-    def sat(inds: List[int], vecs=[26, 5, 32, 3, 15, 18, 31, 13, 24, 25, 34, 5, 15, 24, 16, 13, 0, 27, 37]):
+    def sat(inds: List[int], vecs=[2874444459419665109, 3571416480966091062, 3627516422625241827, 2417762213996395207,
+                                   4371357242721531635, 1396026910505373292, 6671557086560014752, 9066082518122683098,
+                                   5240053591369828114, 8556210480838058892, 7302977584273736381, 8938278934736014411,
+                                   4398671200512032996, 6147375266514044469, 6609538006889421793, 2297823643430705118,
+                                   7583979108118079257, 2498392101379258437, 7893501751515236283, 2027235323873165116,
+                                   925357965000140266, 9009345166609418406, 5689450111800001849, 2079746314404416253,
+                                   4228649029862868917, 5819371323838727219, 102386757609774316, 5480808186035115654,
+                                   3001738569073502536, 9059061077086189682, 681271298018419415, 5616731111115463763,
+                                   2722737236861682531, 4918075690687573998, 7125583379861998376, 7968096465923567867,
+                                   898679944061110348, 1140358409311167922, 6077294650144352445, 587776882127248609,
+                                   2018954969823094844, 1618480274277140739, 8884189689498565225, 4084721521520724931,
+                                   4718438135662169666, 8411612063174086200, 8726374275365985960, 3135872851883336005,
+                                   1091802941995014823, 4944178741300545327, 6970959994566965947, 2911632933598497473,
+                                   8638215954009823387, 7438975146059987571, 3486356869336916018, 4935404783245269300,
+                                   3492912640500734004, 7903591215281799872, 4616161610863395412, 875020887047334808,
+                                   2721628497281503934, 6882639287577667047, 6274957618887284536, 3575443754501116278,
+                                   2031604067526359716, 4433373641914130623, 6204772769819600658, 8509292558066435714,
+                                   1857073904365563798, 7875287918949902618, 5205034693823928900, 4943396962875355147,
+                                   2805601192218759148, 8976171820624983460, 5930936964665834653, 949687393644726240,
+                                   6466615045398392331, 423404729770342491, 2720698610804800422, 7479269416044676778,
+                                   7869290888646534505, 6327163107872545492, 476579447640475544, 1218066186129904051,
+                                   7630726053076756205, 7741086216563432736, 5225376670650457287, 7040078265943665053,
+                                   2162853338175426448, 5633819254572300801, 92334600849454176, 9098183941628882647,
+                                   3481731752092062852, 5473741255745389738, 7266470290696653678, 3090338455353169956,
+                                   4358343354422765853, 3623553173494979282, 8328390749513844747, 2287762878756609646,
+                                   4126189061710502597, 5829472669961813184, 7342395882491704275, 5030578088617810038,
+                                   2210525427289006508, 6161187897225224000, 5601573223749212224, 6539026784581543793,
+                                   3571032801838391198, 4813662449014287760, 6577243754700968179, 4401899289452367605,
+                                   305529480505303551, 1548494450097231731, 6926707725781258948, 6357305518384676781,
+                                   6357665620505806556, 1554358231697328409, 7871587375269472810, 2094942344314098945,
+                                   1452972368095860063, 3210274450167364491, 6901356410911155351, 7609098874470545378,
+                                   6955802737127492446, 6919896432783547538, 5423154486785623318, 3105394980859157674,
+                                   8438962979748731599, 4110730383299136510, 6718356757580670867]):
         """
         Learning parity with noise: Given binary vectors, find the secret set $S$ of indices such that, for at least
         3/4 of the vectors, $$sum_{i \in S} x_i = 1 (mod 2)$$
@@ -88,7 +148,7 @@ def sat(inds: List[int], vecs=[26, 5, 32, 3, 15, 18, 31, 13, 24, 25, 34, 5, 15,
     @staticmethod
     def sol(vecs):
         # brute force
-        d = 0 # decode vectors into arrays
+        d = 0  # decode vectors into arrays
         m = max(vecs)
         while m:
             m >>= 1
@@ -98,26 +158,38 @@ def sol(vecs):
         import random
         rand = random.Random(0)
         target = (len(vecs) * 3) // 4
-        while True:
+        max_attempts = 10**4
+        for _ in range(max_attempts):
             ans = [i for i in range(d) if rand.randrange(2)]
             if sum(sum(v[i] for i in ans) % 2 for v in vecs) >= len(vecs) * 3 / 4:
                 return ans
 
-    def gen_random(self):
-        d = self.random.randrange(2, self.random.choice([11, 100]))  # number of dimensions
+    @staticmethod
+    def rand_parity_problem(rand, d=63):
         secret = None
         while not secret:
-            secret = [i for i in range(d) if self.random.randrange(2)]
-        num_vecs = self.random.randrange(2 * d, 10 * d)
-        vecs = [[self.random.randrange(2) for _ in range(d)] for i in range(num_vecs)]
+            secret = [i for i in range(d) if rand.randrange(2)]
+        print(len(secret))
+        num_vecs = 2 * d + 5
+        vecs = [[rand.randrange(2) for _ in range(d)] for i in range(num_vecs)]
         for v in vecs:
             v[secret[0]] = (1 + sum([v[i] for i in secret[1:]])) % 2
-        mistakes = self.random.sample(vecs, int(len(vecs) * self.random.random() * 1 / 4))
+        mistakes = rand.sample(vecs, int(len(vecs) * rand.random() * 1 / 4))
         for v in mistakes:
             v[secret[0]] ^= 1  # flip bit in mistakes
-        vecs =  [sum(1 << i for i, b in enumerate(v) if b) for v in vecs] # encode into ints
-        self.add(dict(vecs=vecs), test=d < 19)
+        vecs = [sum(1 << i for i, b in enumerate(v) if b) for v in vecs]  # encode into ints
+        return vecs
+
+    def testable(self, inp: dict):
+        return max(inp["vecs"]) < 2 ** 20
 
+    def gen_random(self):
+        d = self.random.randrange(2, self.random.choice([11, 100]))  # number of dimensions
+        vecs = self.rand_parity_problem(
+            self.random,
+            d=d
+        )
+        self.add(dict(vecs=vecs), test=d < 19)
 
 
 if __name__ == "__main__":
diff --git a/make_dataset.py b/make_dataset.py
@@ -223,7 +223,7 @@ def main(args):
     utils.save_json(puzzles, args.json, make_dirs_if_necessary=True, indent=2)
     save_readme(summaries, args.readme, args.solutions)
     utils.info(f"Elapsed time: {(time.perf_counter() - start_time) / 60:.2f} minutes")
-    utils.info(f"Saved {len(puzzles)} to {args.json} and {args.readme}")
+    utils.info(f"Saved {len(puzzles):,} to {args.json} and {args.readme}")
 
 
 if __name__ == "__main__":
diff --git a/puzzle_generator.py b/puzzle_generator.py
@@ -721,7 +721,7 @@ def check_for_trivial_solutions(self, force, already_tested):  # check for trivi
                        f"has trivial solution `{t}`")
             break
         dur = time.perf_counter() - time0
-        if dur > 1.0:
+        if dur > 1.0 * self.multiplier:  # warn if above one second
             utils.warn(f"Took {dur:.1f}s to test for trivial solutions to `{self.name}`")
 
     def gen(self, target_num_instances):
@@ -730,23 +730,12 @@ def gen(self, target_num_instances):
     def gen_random(self):
         pass
 
-    # def check_seen_input(self, inp):
-    #     """
-    #     Returns True if the input is a duplicate of a previous puzzle, and also makes sure that the types match
-    #     """
-    #     s = str(inp)
-    #     if s in self._seen_problems:
-    #         return True  # duplicate problem
-    #
-    #     self._seen_problems.add(s)
-    #
-    #     assert set(inp) == set(self.arg_names), f"Instance #{len(self.instances)} keys mismatch in {self.name}"
-    #     example = self.get_example()
-    #     for k in inp:
-    #         v1, v2 = example[k], inp[k]
-    #         assert same_types(v1, v2), f"Instance #{len(self.instances)} variable `{k}` type mismatch in {self.name}"
-    #
-    #     return False
+    def testable(self, inp: dict):
+        """Override this to ensure that certain examples are not tested.
+        This is the only way to make sure the *example* is not tested.
+        For other instances, you can also avoid testing by calling .add(inp, test=False)
+        """
+        return True
 
     def add(self, inp: dict, test=True):
         s = str(inp)
@@ -762,7 +751,7 @@ def add(self, inp: dict, test=True):
             if not same_types(v1, v2):
                 utils.warn(f"Instance #{self.num_generated_so_far()} variable `{k}` type mismatch in {self.name}")
 
-        self._inputs.append((inp, test))
+        self._inputs.append((inp, test and self.testable(inp)))
 
     # zzzz
     # def add(self, inp: dict, test=True):
diff --git a/puzzles/README.md b/puzzles/README.md
diff --git a/puzzles/puzzles.json b/puzzles/puzzles.json