Skip to content

Commit 33029c3

Browse files
shakedbrVictorSanh
andauthored
Code-related prompts (#714)
* code promtps * code_x_glue_tc_text_to_code templates * neural_code_search prompts * Update promptsource/templates/code_x_glue_tc_text_to_code/templates.yaml Co-authored-by: Victor SANH <victorsanh@gmail.com> * Update promptsource/templates/great_code/templates.yaml Co-authored-by: Victor SANH <victorsanh@gmail.com> * Update promptsource/templates/great_code/templates.yaml Co-authored-by: Victor SANH <victorsanh@gmail.com> * Update promptsource/templates/great_code/templates.yaml Co-authored-by: Victor SANH <victorsanh@gmail.com> * Update promptsource/templates/neural_code_search/evaluation_dataset/templates.yaml Co-authored-by: Victor SANH <victorsanh@gmail.com> * Update promptsource/templates/openai_humaneval/templates.yaml Co-authored-by: Victor SANH <victorsanh@gmail.com> * Update promptsource/templates/openai_humaneval/templates.yaml Co-authored-by: Victor SANH <victorsanh@gmail.com> * Update promptsource/templates/great_code/templates.yaml Co-authored-by: Victor SANH <victorsanh@gmail.com> * Update promptsource/templates/great_code/templates.yaml Co-authored-by: Victor SANH <victorsanh@gmail.com> * Update promptsource/templates/great_code/templates.yaml Co-authored-by: Victor SANH <victorsanh@gmail.com> * Update promptsource/templates/neural_code_search/evaluation_dataset/templates.yaml Co-authored-by: Victor SANH <victorsanh@gmail.com> * small fixes * many fixes * bug fix Co-authored-by: Victor SANH <victorsanh@gmail.com>
1 parent c115744 commit 33029c3

File tree

4 files changed

+265
-0
lines changed

4 files changed

+265
-0
lines changed
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
dataset: code_x_glue_tc_text_to_code
2+
templates:
3+
eb965448-691e-4506-bb61-a54771c7014b: !Template
4+
answer_choices: null
5+
id: eb965448-691e-4506-bb61-a54771c7014b
6+
jinja: "{% set field_seperator = \"concode_field_sep\" %}\n{% set method_seperator\
7+
\ = \"concode_elem_sep\" %}\n{% set ns = namespace(nl=\"\", fields=[], methods=[])\
8+
\ %}\n{% if code | length > 0 %}\n\n {% for chunk_a in nl.split(field_seperator)\
9+
\ %}\n {% set first_iter = loop.index0 == 0 %}\n {% for chunk_b\
10+
\ in chunk_a.split(method_seperator) %}\n {% if loop.index0 == 0\
11+
\ and first_iter %}\n {% set ns.nl = chunk_b %}\n \
12+
\ {% elif loop.index0 == 0 %}\n {% set ns.fields = ns.fields\
13+
\ + [chunk_b.strip()] %}\n {% else %}\n {% set ns.methods\
14+
\ = ns.methods + [chunk_b.strip()] %}\n {% endif %}\n {% endfor\
15+
\ %}\n {% endfor %}\n Method description:\n {{ns.nl}}\n\n Class\
16+
\ fields:\n {{ns.fields | unique | join(\", \")}}\n\n Class methods:\n\
17+
\ {{ns.methods | unique | join(\", \")}}\n\n Generate the method\n \
18+
\ |||\n {{code}}\n{% endif %}"
19+
metadata: !TemplateMetadata
20+
choices_in_prompt: false
21+
metrics:
22+
- Accuracy
23+
- BLEU
24+
original_task: true
25+
name: generate class member function given class environment
26+
reference: ''
Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
dataset: great_code
2+
templates:
3+
027215bb-1055-4584-b3ce-3267a8043d3a: !Template
4+
answer_choices: null
5+
id: 027215bb-1055-4584-b3ce-3267a8043d3a
6+
jinja: "{% set mask = 'def <FUNC_NAME> (' %}\n{% set indent = ' ' %}\n{% set\
7+
\ ns = namespace(indent_size=0, result=[], masked=false, target='') %}\n{% for\
8+
\ token in source_tokens %}\n {% if ns.masked is false and token.startswith('def')\
9+
\ %}\n {% set ns.target = token.split('def ')[1][:-1] %}\n \
10+
\ {% set token = mask %}\n {% set ns.masked = true %}\n {%\
11+
\ endif%}\n {% if token== '#INDENT#' %}\n {% set ns.indent_size = ns.indent_size\
12+
\ + 1 %}\n {% set ns.result = ns.result + [indent * ns.indent_size] %}\n\
13+
\ {% elif token == '#NEWLINE#' %}\n {% set ns.result = ns.result\
14+
\ + [\"\\n\"] %}\n {% elif token == '#UNINDENT#' %}\n {% set ns.indent_size\
15+
\ = ns.indent_size - 1 %}\n {% else %}\n {% if not loop.first and\
16+
\ loop.previtem == '#NEWLINE#' %}\n {% set ns.result = ns.result\
17+
\ + [indent * ns.indent_size] %}\n {% endif %}\n {% set ns.result\
18+
\ = ns.result + [token | replace('\\\\n', '\\n'), \" \"] %}\n {% endif %}\n\
19+
{% endfor %}\n{{ns.result | join(\"\") | replace(\" . \", \".\") | replace(\"\
20+
\ , \", \", \") | replace(\"( \", \"(\") | replace(\" )\", \")\") | replace(\"\
21+
[ \", \"[\") | replace(\" ]\", \"]\")}}\n\nWhat is the function name?\n|||\n\
22+
{{ ns.target }}"
23+
metadata: !TemplateMetadata
24+
choices_in_prompt: false
25+
metrics:
26+
- Accuracy
27+
original_task: false
28+
name: function name generation
29+
reference: ''
30+
24fa908d-daa2-4fd2-bb22-374021c1fc23: !Template
31+
answer_choices: null
32+
id: 24fa908d-daa2-4fd2-bb22-374021c1fc23
33+
jinja: "{% set result = \"\" %}\n{% set indent = ' ' %}\n{% set ns = namespace(indent_size=0,\
34+
\ line_number=0, buggy_line=0, bug_location=0, bug_len=0, result=[], result_lines=[])\
35+
\ %}\n{% set fixed_token = source_tokens[repair_targets[0]] %}\n{% set buggy_line_content\
36+
\ = \"\" %}\n{% set fixed_buggy_line_content = \"\" %}\n\n{% if has_bug and\
37+
\ (repair_targets | length > 0) %}\n {% for token in source_tokens %}\n\
38+
\ {% if loop.index0 == error_location %}\n {% set ns.buggy_line\
39+
\ = ns.line_number %}\n {% set ns.bug_location = (ns.result |\
40+
\ join(\"\") | length) %}\n {% set ns.bug_len = (token | length)\
41+
\ %}\n {% endif%}\n {% if token== '#INDENT#' %}\n {%\
42+
\ set ns.indent_size = ns.indent_size + 1 %}\n {% set ns.result =\
43+
\ ns.result + [indent * ns.indent_size] %}\n {% elif token == '#NEWLINE#'\
44+
\ %}\n {% set ns.result_lines = ns.result_lines + [ns.result |\
45+
\ join(\"\")] %}\n {% set ns.result = [] %}\n {% set\
46+
\ ns.line_number = ns.line_number + 1 %}\n {% elif token == '#UNINDENT#'\
47+
\ %}\n {% set ns.indent_size = ns.indent_size - 1 %}\n {%\
48+
\ else %}\n {% if not loop.first and loop.previtem == '#NEWLINE#'\
49+
\ %}\n {% set ns.result = ns.result + [indent * ns.indent_size]\
50+
\ %}\n {% endif %}\n {% set ns.result = ns.result\
51+
\ + [token | replace('\\\\n', '\\n'), \" \"] %}\n {% endif %}\n {%\
52+
\ endfor %}\n {% set ns.result_lines = ns.result_lines + [ns.result | join(\"\
53+
\")] %}\n {% set result = ns.result_lines | join(\"\\n\") %}\n {{result\
54+
\ | replace(\" . \", \".\") | replace(\" , \", \", \") | replace(\"( \", \"\
55+
(\") | replace(\" )\", \")\") | replace(\"[ \", \"[\") | replace(\" ]\", \"\
56+
]\")}}\n\n {% set buggy_line_content = ns.result_lines[ns.buggy_line] | trim\
57+
\ | replace(\" . \", \".\") | replace(\" , \", \", \") | replace(\"( \", \"\
58+
(\") | replace(\" )\", \")\") | replace(\"[ \", \"[\") | replace(\" ]\", \"\
59+
]\") %}\n {% set fixed_buggy_line_content = (ns.result_lines[ns.buggy_line][:ns.bug_location]\
60+
\ + fixed_token + ns.result_lines[ns.buggy_line][ns.bug_location + ns.bug_len:])\
61+
\ | trim | replace(\" . \", \".\") | replace(\" , \", \", \") | replace(\"\
62+
( \", \"(\") | replace(\" )\", \")\") | replace(\"[ \", \"[\") | replace(\"\
63+
\ ]\", \"]\")%}\n\n Fix the buggy line: {{buggy_line_content}}\n |||\n\
64+
\ {{fixed_buggy_line_content}}\n{% endif %}"
65+
metadata: !TemplateMetadata
66+
choices_in_prompt: false
67+
metrics:
68+
- Other
69+
original_task: false
70+
name: fix buggy line
71+
reference: ''
72+
3c0f36da-834a-4814-86f6-6e55414fbd32: !Template
73+
answer_choices: null
74+
id: 3c0f36da-834a-4814-86f6-6e55414fbd32
75+
jinja: "{% set mask = '<MASK>' %}\n{% set indent = ' ' %}\n{% set ns = namespace(indent_size=0,\
76+
\ result=[]) %}\n\n{% if has_bug %}\n {% for token in source_tokens %}\n\
77+
\ {% if loop.index0 == error_location %}\n {% set token\
78+
\ = mask %}\n {% endif%}\n {% if token== '#INDENT#' %}\n \
79+
\ {% set ns.indent_size = ns.indent_size + 1 %}\n {% set ns.result\
80+
\ = ns.result + [indent * ns.indent_size] %}\n {% elif token == '#NEWLINE#'\
81+
\ %}\n {% set ns.result = ns.result + [\"\\n\"] %}\n {%\
82+
\ elif token == '#UNINDENT#' %}\n {% set ns.indent_size = ns.indent_size\
83+
\ - 1 %}\n {% else %}\n {% if not loop.first and loop.previtem\
84+
\ == '#NEWLINE#' %}\n {% set ns.result = ns.result + [indent\
85+
\ * ns.indent_size] %}\n {% endif %}\n {% set ns.result\
86+
\ = ns.result + [token | replace('\\\\n', '\\n'), \" \"] %}\n {% endif\
87+
\ %}\n {% endfor %}\n {{ns.result | join(\"\") | replace(\" . \", \".\"\
88+
) | replace(\" , \", \", \") | replace(\"( \", \"(\") | replace(\" )\", \"\
89+
)\") | replace(\"[ \", \"[\") | replace(\" ]\", \"]\")}}\n \n Given the\
90+
\ code above, what is a proper replacement for {{mask}}?\n |||\n {{source_tokens[repair_targets[0]]}}\n\
91+
{% endif %}"
92+
metadata: !TemplateMetadata
93+
choices_in_prompt: false
94+
metrics:
95+
- Accuracy
96+
original_task: true
97+
name: identifier prediction no choices
98+
reference: ''
99+
57f93ca1-1e27-40ff-8fa4-ba11a499baef: !Template
100+
answer_choices: Yes ||| No
101+
id: 57f93ca1-1e27-40ff-8fa4-ba11a499baef
102+
jinja: "{% set indent = ' ' %}\n{% set ns = namespace(indent_size=0, result=[])\
103+
\ %}\n{% for token in source_tokens %}\n {% if token== '#INDENT#' %}\n \
104+
\ {% set ns.indent_size = ns.indent_size + 1 %}\n {% set ns.result\
105+
\ = ns.result + [indent * ns.indent_size] %}\n {% elif token == '#NEWLINE#'\
106+
\ %}\n {% set ns.result = ns.result + [\"\\n\"] %}\n {% elif token\
107+
\ == '#UNINDENT#' %}\n {% set ns.indent_size = ns.indent_size - 1 %}\n\
108+
\ {% else %}\n {% if not loop.first and loop.previtem == '#NEWLINE#'\
109+
\ %}\n {% set ns.result = ns.result + [indent * ns.indent_size]\
110+
\ %}\n {% endif %}\n {% set ns.result = ns.result + [token\
111+
\ | replace('\\\\n', '\\n'), \" \"] %}\n {% endif %}\n{% endfor %}\n{{ns.result\
112+
\ | join(\"\") | replace(\" . \", \".\") | replace(\" , \", \", \") | replace(\"\
113+
( \", \"(\") | replace(\" )\", \")\") | replace(\"[ \", \"[\") | replace(\"\
114+
\ ]\", \"]\")}}\n\nIs there a bug in the code above?\n|||\n{{ {True: \"Yes\"\
115+
, False: \"No\"}[has_bug] }}"
116+
metadata: !TemplateMetadata
117+
choices_in_prompt: true
118+
metrics:
119+
- Accuracy
120+
original_task: false
121+
name: bug detection
122+
reference: ''
123+
6b154f4f-50a5-4890-b0fc-a0118061ce0c: !Template
124+
answer_choices: '{% if has_bug %} {% set nss = namespace(choices=[]) %} {%
125+
for i in repair_candidates %} {% set nss.choices = nss.choices + [source_tokens[(i
126+
| int)]] %} {% endfor %} {{nss.choices | unique | join(" ||| ")}} {%
127+
endif %}'
128+
id: 6b154f4f-50a5-4890-b0fc-a0118061ce0c
129+
jinja: "{% set mask = '<MASK>' %}\n{% set indent = ' ' %}\n{% set ns = namespace(indent_size=0,\
130+
\ result=[]) %}\n\n{% if has_bug %}\n {% for token in source_tokens %}\n\
131+
\ {% if loop.index0 == error_location %}\n {% set token\
132+
\ = mask %}\n {% endif%}\n {% if token== '#INDENT#' %}\n \
133+
\ {% set ns.indent_size = ns.indent_size + 1 %}\n {% set ns.result\
134+
\ = ns.result + [indent * ns.indent_size] %}\n {% elif token == '#NEWLINE#'\
135+
\ %}\n {% set ns.result = ns.result + [\"\\n\"] %}\n {%\
136+
\ elif token == '#UNINDENT#' %}\n {% set ns.indent_size = ns.indent_size\
137+
\ - 1 %}\n {% else %}\n {% if not loop.first and loop.previtem\
138+
\ == '#NEWLINE#' %}\n {% set ns.result = ns.result + [indent\
139+
\ * ns.indent_size] %}\n {% endif %}\n {% set ns.result\
140+
\ = ns.result + [token | replace('\\\\n', '\\n'), \" \"] %}\n {% endif\
141+
\ %}\n {% endfor %}\n {{ns.result | join(\"\") | replace(\" . \", \".\"\
142+
) | replace(\" , \", \", \") | replace(\"( \", \"(\") | replace(\" )\", \"\
143+
)\") | replace(\"[ \", \"[\") | replace(\" ]\", \"]\")}}\n \n Given the\
144+
\ code above, what is a proper replacement for {{mask}}? Choose among: {{answer_choices\
145+
\ | join(\", \")}}\n |||\n {{source_tokens[repair_targets[0]]}}\n{% endif\
146+
\ %}"
147+
metadata: !TemplateMetadata
148+
choices_in_prompt: true
149+
metrics:
150+
- Accuracy
151+
original_task: false
152+
name: identifier prediction with choices
153+
reference: ''
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
dataset: neural_code_search
2+
subset: evaluation_dataset
3+
templates:
4+
30858249-c732-46a6-85b5-466fe964c4d4: !Template
5+
answer_choices: null
6+
id: 30858249-c732-46a6-85b5-466fe964c4d4
7+
jinja: 'Description:
8+
9+
{{ question }}
10+
11+
12+
Implementation:
13+
14+
|||
15+
16+
{{ answer }}
17+
18+
'
19+
metadata: !TemplateMetadata
20+
choices_in_prompt: false
21+
metrics:
22+
- BLEU
23+
- ROUGE
24+
- Other
25+
original_task: false
26+
name: generate code given a description
27+
reference: ''
28+
34f4095d-0ce0-42d5-8070-1626dd51b987: !Template
29+
answer_choices: null
30+
id: 34f4095d-0ce0-42d5-8070-1626dd51b987
31+
jinja: 'Given the following code:
32+
33+
{{ answer }}
34+
35+
Describe it:
36+
37+
|||
38+
39+
{{ question }}'
40+
metadata: !TemplateMetadata
41+
choices_in_prompt: false
42+
metrics:
43+
- BLEU
44+
- ROUGE
45+
original_task: false
46+
name: generate a description given code
47+
reference: ''
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
dataset: openai_humaneval
2+
templates:
3+
4a108b1c-7514-488f-99ed-3ca5da70e103: !Template
4+
answer_choices: null
5+
id: 4a108b1c-7514-488f-99ed-3ca5da70e103
6+
jinja: '{{ prompt }}
7+
8+
Given the following docstring, what is the function body?
9+
10+
|||
11+
12+
{{ canonical_solution }}'
13+
metadata: !TemplateMetadata
14+
choices_in_prompt: false
15+
metrics:
16+
- Other
17+
original_task: true
18+
name: function body generation
19+
reference: ''
20+
9c85c898-70fe-4a51-be37-5111be357762: !Template
21+
answer_choices: null
22+
id: 9c85c898-70fe-4a51-be37-5111be357762
23+
jinja: "{% set ns = namespace(tests=[])%}\n{% set lines = test.split('\\n') %}\n\
24+
{% set test_ = \"\" %}\n{% set args = \"\" %}\n{% set return_val = \"\" %}\n\
25+
\n{% for line in lines %}\n {% if line.strip().startswith('assert') and \"\
26+
==\" in line.strip() %}\n {% set ns.tests = ns.tests + [line.split('assert')[1]]\
27+
\ %}\n {% endif %}\n{% endfor %}\n{% if (ns.tests | length) > 0 %}\n {%\
28+
\ set test_ = ns.tests | choice %}\n\n {% set return_val = test_.split(\"\
29+
==\")[1].split(\", \\\"\")[0].strip() %}\n {% set args = (test_.split('(')[1:]\
30+
\ | join(\"\")).split(\"==\")[0].strip() %}\n {{ prompt }}\n {{ canonical_solution\
31+
\ }}\n {{entry_point}}({{args}} =\n |||\n {{ return_val }}\n{% endif\
32+
\ %}\n"
33+
metadata: !TemplateMetadata
34+
choices_in_prompt: false
35+
metrics:
36+
- Other
37+
original_task: false
38+
name: function call return value generation
39+
reference: ''

0 commit comments

Comments
 (0)