| |
| |
|
|
| from evaluator.CodeBLEU.parser import DFG_python, DFG_java, DFG_ruby, DFG_go, DFG_php, DFG_javascript, DFG_csharp |
| from evaluator.CodeBLEU.parser import (remove_comments_and_docstrings, |
| tree_to_token_index, |
| index_to_code_token, |
| tree_to_variable_index) |
| from tree_sitter import Language, Parser |
| import os |
|
|
| root_dir = os.path.dirname(__file__) |
| dfg_function = { |
| 'python': DFG_python, |
| 'java': DFG_java, |
| 'ruby': DFG_ruby, |
| 'go': DFG_go, |
| 'php': DFG_php, |
| 'javascript': DFG_javascript, |
| 'c_sharp': DFG_csharp, |
| } |
|
|
|
|
| def calc_syntax_match(references, candidate, lang): |
| return corpus_syntax_match([references], [candidate], lang) |
|
|
|
|
| def corpus_syntax_match(references, candidates, lang): |
| JAVA_LANGUAGE = Language(root_dir + '/parser/my-languages.so', lang) |
| parser = Parser() |
| parser.set_language(JAVA_LANGUAGE) |
| match_count = 0 |
| total_count = 0 |
|
|
| for i in range(len(candidates)): |
| references_sample = references[i] |
| candidate = candidates[i] |
| for reference in references_sample: |
| try: |
| candidate = remove_comments_and_docstrings(candidate, 'java') |
| except: |
| pass |
| try: |
| reference = remove_comments_and_docstrings(reference, 'java') |
| except: |
| pass |
|
|
| candidate_tree = parser.parse(bytes(candidate, 'utf8')).root_node |
|
|
| reference_tree = parser.parse(bytes(reference, 'utf8')).root_node |
|
|
| def get_all_sub_trees(root_node): |
| node_stack = [] |
| sub_tree_sexp_list = [] |
| depth = 1 |
| node_stack.append([root_node, depth]) |
| while len(node_stack) != 0: |
| cur_node, cur_depth = node_stack.pop() |
| sub_tree_sexp_list.append([cur_node.sexp(), cur_depth]) |
| for child_node in cur_node.children: |
| if len(child_node.children) != 0: |
| depth = cur_depth + 1 |
| node_stack.append([child_node, depth]) |
| return sub_tree_sexp_list |
|
|
| cand_sexps = [x[0] for x in get_all_sub_trees(candidate_tree)] |
| ref_sexps = get_all_sub_trees(reference_tree) |
|
|
| |
| |
|
|
| for sub_tree, depth in ref_sexps: |
| if sub_tree in cand_sexps: |
| match_count += 1 |
| total_count += len(ref_sexps) |
|
|
| score = match_count / total_count |
| return score |
|
|