Code-related prompts (#714)

shakedbr · VictorSanh · web-flow · commit 33029c3e07e0 · 2022-02-14T16:00:34.000-05:00
* code promtps

* code_x_glue_tc_text_to_code templates

* neural_code_search prompts

* Update promptsource/templates/code_x_glue_tc_text_to_code/templates.yaml

Co-authored-by: Victor SANH &lt;victorsanh@gmail.com&gt;

* Update promptsource/templates/great_code/templates.yaml

Co-authored-by: Victor SANH &lt;victorsanh@gmail.com&gt;

* Update promptsource/templates/great_code/templates.yaml

Co-authored-by: Victor SANH &lt;victorsanh@gmail.com&gt;

* Update promptsource/templates/great_code/templates.yaml

Co-authored-by: Victor SANH &lt;victorsanh@gmail.com&gt;

* Update promptsource/templates/neural_code_search/evaluation_dataset/templates.yaml

Co-authored-by: Victor SANH &lt;victorsanh@gmail.com&gt;

* Update promptsource/templates/openai_humaneval/templates.yaml

Co-authored-by: Victor SANH &lt;victorsanh@gmail.com&gt;

* Update promptsource/templates/openai_humaneval/templates.yaml

Co-authored-by: Victor SANH &lt;victorsanh@gmail.com&gt;

* Update promptsource/templates/great_code/templates.yaml

Co-authored-by: Victor SANH &lt;victorsanh@gmail.com&gt;

* Update promptsource/templates/great_code/templates.yaml

Co-authored-by: Victor SANH &lt;victorsanh@gmail.com&gt;

* Update promptsource/templates/great_code/templates.yaml

Co-authored-by: Victor SANH &lt;victorsanh@gmail.com&gt;

* Update promptsource/templates/neural_code_search/evaluation_dataset/templates.yaml

Co-authored-by: Victor SANH &lt;victorsanh@gmail.com&gt;

* small fixes

* many fixes

* bug fix

Co-authored-by: Victor SANH &lt;victorsanh@gmail.com&gt;
diff --git a/promptsource/templates/code_x_glue_tc_text_to_code/templates.yaml b/promptsource/templates/code_x_glue_tc_text_to_code/templates.yaml
@@ -0,0 +1,26 @@
+dataset: code_x_glue_tc_text_to_code
+templates:
+  eb965448-691e-4506-bb61-a54771c7014b: !Template
+    answer_choices: null
+    id: eb965448-691e-4506-bb61-a54771c7014b
+    jinja: "{% set field_seperator = \"concode_field_sep\" %}\n{% set method_seperator\
+      \ = \"concode_elem_sep\" %}\n{% set ns = namespace(nl=\"\", fields=[], methods=[])\
+      \ %}\n{% if code | length > 0 %}\n\n    {% for chunk_a in nl.split(field_seperator)\
+      \  %}\n        {% set first_iter = loop.index0 == 0 %}\n        {% for chunk_b\
+      \ in chunk_a.split(method_seperator)  %}\n            {% if loop.index0 == 0\
+      \ and first_iter %}\n                {% set ns.nl = chunk_b %}\n           \
+      \ {% elif loop.index0 == 0 %}\n                {% set ns.fields = ns.fields\
+      \ + [chunk_b.strip()] %}\n            {% else %}\n                {% set ns.methods\
+      \ = ns.methods + [chunk_b.strip()] %}\n            {% endif %}\n        {% endfor\
+      \ %}\n    {% endfor %}\n    Method description:\n    {{ns.nl}}\n\n    Class\
+      \ fields:\n    {{ns.fields | unique | join(\", \")}}\n\n    Class methods:\n\
+      \    {{ns.methods | unique | join(\", \")}}\n\n    Generate the method\n   \
+      \ |||\n    {{code}}\n{% endif %}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - Accuracy
+      - BLEU
+      original_task: true
+    name: generate class member function given class environment
+    reference: ''
diff --git a/promptsource/templates/great_code/templates.yaml b/promptsource/templates/great_code/templates.yaml
@@ -0,0 +1,153 @@
+dataset: great_code
+templates:
+  027215bb-1055-4584-b3ce-3267a8043d3a: !Template
+    answer_choices: null
+    id: 027215bb-1055-4584-b3ce-3267a8043d3a
+    jinja: "{% set mask = 'def <FUNC_NAME> (' %}\n{% set indent = '    ' %}\n{% set\
+      \ ns = namespace(indent_size=0, result=[], masked=false, target='') %}\n{% for\
+      \ token in source_tokens  %}\n    {% if ns.masked is false and token.startswith('def')\
+      \ %}\n            {% set ns.target = token.split('def ')[1][:-1] %}\n      \
+      \      {% set token = mask %}\n            {% set ns.masked = true %}\n    {%\
+      \ endif%}\n    {% if token== '#INDENT#' %}\n       {% set ns.indent_size = ns.indent_size\
+      \ + 1 %}\n       {% set ns.result = ns.result + [indent * ns.indent_size] %}\n\
+      \    {% elif token == '#NEWLINE#' %}\n          {% set ns.result = ns.result\
+      \ + [\"\\n\"] %}\n    {% elif token == '#UNINDENT#' %}\n        {% set ns.indent_size\
+      \ = ns.indent_size - 1 %}\n    {% else %}\n          {% if not loop.first and\
+      \ loop.previtem == '#NEWLINE#' %}\n               {% set ns.result = ns.result\
+      \ + [indent * ns.indent_size] %}\n          {% endif %}\n          {% set ns.result\
+      \ = ns.result + [token | replace('\\\\n', '\\n'), \" \"] %}\n    {% endif %}\n\
+      {% endfor %}\n{{ns.result | join(\"\") | replace(\" . \", \".\") | replace(\"\
+      \ , \", \", \")  | replace(\"( \", \"(\") | replace(\" )\", \")\") | replace(\"\
+      [ \", \"[\") | replace(\" ]\", \"]\")}}\n\nWhat is the function name?\n|||\n\
+      {{ ns.target }}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - Accuracy
+      original_task: false
+    name: function name generation
+    reference: ''
+  24fa908d-daa2-4fd2-bb22-374021c1fc23: !Template
+    answer_choices: null
+    id: 24fa908d-daa2-4fd2-bb22-374021c1fc23
+    jinja: "{% set result = \"\" %}\n{% set indent = '    ' %}\n{% set ns = namespace(indent_size=0,\
+      \ line_number=0, buggy_line=0, bug_location=0, bug_len=0, result=[], result_lines=[])\
+      \ %}\n{% set fixed_token = source_tokens[repair_targets[0]] %}\n{% set buggy_line_content\
+      \ = \"\" %}\n{% set fixed_buggy_line_content = \"\" %}\n\n{% if has_bug and\
+      \ (repair_targets | length > 0) %}\n    {% for token in source_tokens  %}\n\
+      \        {% if loop.index0 == error_location %}\n                {% set ns.buggy_line\
+      \ = ns.line_number %}\n                {% set ns.bug_location = (ns.result |\
+      \ join(\"\") | length) %}\n                {% set ns.bug_len = (token | length)\
+      \ %}\n        {% endif%}\n        {% if token== '#INDENT#' %}\n           {%\
+      \ set ns.indent_size = ns.indent_size + 1 %}\n           {% set ns.result =\
+      \ ns.result + [indent * ns.indent_size] %}\n        {% elif token == '#NEWLINE#'\
+      \ %}\n              {% set ns.result_lines = ns.result_lines + [ns.result |\
+      \ join(\"\")] %}\n              {% set ns.result = [] %}\n              {% set\
+      \ ns.line_number = ns.line_number + 1 %}\n        {% elif token == '#UNINDENT#'\
+      \ %}\n            {% set ns.indent_size = ns.indent_size - 1 %}\n        {%\
+      \ else %}\n              {% if not loop.first and loop.previtem == '#NEWLINE#'\
+      \ %}\n                   {% set ns.result = ns.result + [indent * ns.indent_size]\
+      \ %}\n              {% endif %}\n              {% set ns.result = ns.result\
+      \ + [token | replace('\\\\n', '\\n'), \" \"] %}\n        {% endif %}\n    {%\
+      \ endfor %}\n    {% set ns.result_lines = ns.result_lines + [ns.result | join(\"\
+      \")] %}\n    {% set result = ns.result_lines | join(\"\\n\") %}\n    {{result\
+      \ | replace(\" . \", \".\") | replace(\" , \", \", \")  | replace(\"( \", \"\
+      (\") | replace(\" )\", \")\") | replace(\"[ \", \"[\") | replace(\" ]\", \"\
+      ]\")}}\n\n    {% set buggy_line_content = ns.result_lines[ns.buggy_line] | trim\
+      \ | replace(\" . \", \".\") | replace(\" , \", \", \")  | replace(\"( \", \"\
+      (\") | replace(\" )\", \")\") | replace(\"[ \", \"[\") | replace(\" ]\", \"\
+      ]\") %}\n    {% set fixed_buggy_line_content = (ns.result_lines[ns.buggy_line][:ns.bug_location]\
+      \ + fixed_token +  ns.result_lines[ns.buggy_line][ns.bug_location + ns.bug_len:])\
+      \ | trim | replace(\" . \", \".\") | replace(\" , \", \", \")  | replace(\"\
+      ( \", \"(\") | replace(\" )\", \")\") | replace(\"[ \", \"[\") | replace(\"\
+      \ ]\", \"]\")%}\n\n    Fix the buggy line: {{buggy_line_content}}\n    |||\n\
+      \    {{fixed_buggy_line_content}}\n{% endif %}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - Other
+      original_task: false
+    name: fix buggy line
+    reference: ''
+  3c0f36da-834a-4814-86f6-6e55414fbd32: !Template
+    answer_choices: null
+    id: 3c0f36da-834a-4814-86f6-6e55414fbd32
+    jinja: "{% set mask = '<MASK>' %}\n{% set indent = '    ' %}\n{% set ns = namespace(indent_size=0,\
+      \ result=[]) %}\n\n{% if has_bug %}\n    {% for token in source_tokens  %}\n\
+      \        {% if loop.index0 == error_location %}\n                {% set token\
+      \ = mask %}\n        {% endif%}\n        {% if token== '#INDENT#' %}\n     \
+      \      {% set ns.indent_size = ns.indent_size + 1 %}\n           {% set ns.result\
+      \ = ns.result + [indent * ns.indent_size] %}\n        {% elif token == '#NEWLINE#'\
+      \ %}\n              {% set ns.result = ns.result + [\"\\n\"] %}\n        {%\
+      \ elif token == '#UNINDENT#' %}\n            {% set ns.indent_size = ns.indent_size\
+      \ - 1 %}\n        {% else %}\n              {% if not loop.first and loop.previtem\
+      \ == '#NEWLINE#' %}\n                   {% set ns.result = ns.result + [indent\
+      \ * ns.indent_size] %}\n              {% endif %}\n              {% set ns.result\
+      \ = ns.result + [token | replace('\\\\n', '\\n'), \" \"] %}\n        {% endif\
+      \ %}\n    {% endfor %}\n    {{ns.result | join(\"\") | replace(\" . \", \".\"\
+      ) | replace(\" , \", \", \")  | replace(\"( \", \"(\") | replace(\" )\", \"\
+      )\") | replace(\"[ \", \"[\") | replace(\" ]\", \"]\")}}\n    \n    Given the\
+      \ code above, what is a proper replacement for {{mask}}?\n    |||\n    {{source_tokens[repair_targets[0]]}}\n\
+      {% endif %}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - Accuracy
+      original_task: true
+    name: identifier prediction no choices
+    reference: ''
+  57f93ca1-1e27-40ff-8fa4-ba11a499baef: !Template
+    answer_choices: Yes ||| No
+    id: 57f93ca1-1e27-40ff-8fa4-ba11a499baef
+    jinja: "{% set indent = '    ' %}\n{% set ns = namespace(indent_size=0, result=[])\
+      \ %}\n{% for token in source_tokens  %}\n    {% if token== '#INDENT#' %}\n \
+      \      {% set ns.indent_size = ns.indent_size + 1 %}\n       {% set ns.result\
+      \ = ns.result + [indent * ns.indent_size] %}\n    {% elif token == '#NEWLINE#'\
+      \ %}\n          {% set ns.result = ns.result + [\"\\n\"] %}\n    {% elif token\
+      \ == '#UNINDENT#' %}\n        {% set ns.indent_size = ns.indent_size - 1 %}\n\
+      \    {% else %}\n          {% if not loop.first and loop.previtem == '#NEWLINE#'\
+      \ %}\n               {% set ns.result = ns.result + [indent * ns.indent_size]\
+      \ %}\n          {% endif %}\n          {% set ns.result = ns.result + [token\
+      \ | replace('\\\\n', '\\n'), \" \"] %}\n    {% endif %}\n{% endfor %}\n{{ns.result\
+      \ | join(\"\") | replace(\" . \", \".\") | replace(\" , \", \", \")  | replace(\"\
+      ( \", \"(\") | replace(\" )\", \")\") | replace(\"[ \", \"[\") | replace(\"\
+      \ ]\", \"]\")}}\n\nIs there a bug in the code above?\n|||\n{{ {True: \"Yes\"\
+      , False: \"No\"}[has_bug] }}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: true
+      metrics:
+      - Accuracy
+      original_task: false
+    name: bug detection
+    reference: ''
+  6b154f4f-50a5-4890-b0fc-a0118061ce0c: !Template
+    answer_choices: '{% if has_bug %}     {% set nss = namespace(choices=[]) %}     {%
+      for i in repair_candidates  %}         {% set nss.choices = nss.choices + [source_tokens[(i
+      | int)]] %}     {% endfor %}     {{nss.choices | unique | join(" ||| ")}} {%
+      endif %}'
+    id: 6b154f4f-50a5-4890-b0fc-a0118061ce0c
+    jinja: "{% set mask = '<MASK>' %}\n{% set indent = '    ' %}\n{% set ns = namespace(indent_size=0,\
+      \ result=[]) %}\n\n{% if has_bug %}\n    {% for token in source_tokens  %}\n\
+      \        {% if loop.index0 == error_location %}\n                {% set token\
+      \ = mask %}\n        {% endif%}\n        {% if token== '#INDENT#' %}\n     \
+      \      {% set ns.indent_size = ns.indent_size + 1 %}\n           {% set ns.result\
+      \ = ns.result + [indent * ns.indent_size] %}\n        {% elif token == '#NEWLINE#'\
+      \ %}\n              {% set ns.result = ns.result + [\"\\n\"] %}\n        {%\
+      \ elif token == '#UNINDENT#' %}\n            {% set ns.indent_size = ns.indent_size\
+      \ - 1 %}\n        {% else %}\n              {% if not loop.first and loop.previtem\
+      \ == '#NEWLINE#' %}\n                   {% set ns.result = ns.result + [indent\
+      \ * ns.indent_size] %}\n              {% endif %}\n              {% set ns.result\
+      \ = ns.result + [token | replace('\\\\n', '\\n'), \" \"] %}\n        {% endif\
+      \ %}\n    {% endfor %}\n    {{ns.result | join(\"\") | replace(\" . \", \".\"\
+      ) | replace(\" , \", \", \")  | replace(\"( \", \"(\") | replace(\" )\", \"\
+      )\") | replace(\"[ \", \"[\") | replace(\" ]\", \"]\")}}\n    \n    Given the\
+      \ code above, what is a proper replacement for {{mask}}? Choose among: {{answer_choices\
+      \ | join(\", \")}}\n    |||\n    {{source_tokens[repair_targets[0]]}}\n{% endif\
+      \ %}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: true
+      metrics:
+      - Accuracy
+      original_task: false
+    name: identifier prediction with choices
+    reference: ''
diff --git a/promptsource/templates/neural_code_search/evaluation_dataset/templates.yaml b/promptsource/templates/neural_code_search/evaluation_dataset/templates.yaml
@@ -0,0 +1,47 @@
+dataset: neural_code_search
+subset: evaluation_dataset
+templates:
+  30858249-c732-46a6-85b5-466fe964c4d4: !Template
+    answer_choices: null
+    id: 30858249-c732-46a6-85b5-466fe964c4d4
+    jinja: 'Description:
+
+      {{ question }}
+
+
+      Implementation:
+
+      |||
+
+      {{ answer }}
+
+      '
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - BLEU
+      - ROUGE
+      - Other
+      original_task: false
+    name: generate code given a description
+    reference: ''
+  34f4095d-0ce0-42d5-8070-1626dd51b987: !Template
+    answer_choices: null
+    id: 34f4095d-0ce0-42d5-8070-1626dd51b987
+    jinja: 'Given the following code:
+
+      {{ answer }}
+
+      Describe it:
+
+      |||
+
+      {{ question }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - BLEU
+      - ROUGE
+      original_task: false
+    name: generate a description given code
+    reference: ''
diff --git a/promptsource/templates/openai_humaneval/templates.yaml b/promptsource/templates/openai_humaneval/templates.yaml
@@ -0,0 +1,39 @@
+dataset: openai_humaneval
+templates:
+  4a108b1c-7514-488f-99ed-3ca5da70e103: !Template
+    answer_choices: null
+    id: 4a108b1c-7514-488f-99ed-3ca5da70e103
+    jinja: '{{ prompt }}
+
+      Given the following docstring, what is the function body?
+
+      |||
+
+      {{ canonical_solution }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - Other
+      original_task: true
+    name: function body generation
+    reference: ''
+  9c85c898-70fe-4a51-be37-5111be357762: !Template
+    answer_choices: null
+    id: 9c85c898-70fe-4a51-be37-5111be357762
+    jinja: "{% set ns = namespace(tests=[])%}\n{% set lines = test.split('\\n') %}\n\
+      {% set test_ = \"\" %}\n{% set args = \"\" %}\n{% set return_val = \"\" %}\n\
+      \n{% for line in lines %}\n    {% if line.strip().startswith('assert') and \"\
+      ==\" in line.strip() %}\n        {% set ns.tests = ns.tests + [line.split('assert')[1]]\
+      \ %}\n    {% endif %}\n{% endfor %}\n{% if (ns.tests | length) > 0 %}\n    {%\
+      \ set test_ = ns.tests | choice  %}\n\n    {% set return_val = test_.split(\"\
+      ==\")[1].split(\", \\\"\")[0].strip() %}\n    {% set args = (test_.split('(')[1:]\
+      \ | join(\"\")).split(\"==\")[0].strip() %}\n    {{ prompt }}\n    {{ canonical_solution\
+      \ }}\n    {{entry_point}}({{args}} =\n    |||\n    {{ return_val }}\n{% endif\
+      \ %}\n"
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - Other
+      original_task: false
+    name: function call return value generation
+    reference: ''