@article{da Silva-2020-CROKAGE:,
title = "CROKAGE: effective solution recommendation for programming tasks by leveraging crowd knowledge",
author = "Silva, Rodrigo Fernandes Gomes da and
Roy, Chanchal K. and
Rahman, Mohammad Masudur and
Schneider, Kevin A. and
Paix{\~a}o, Kl{\'e}risson V. R. and
Dantas, Carlos Eduardo de Carvalho and
Maia, Marcelo de Almeida",
journal = "Empirical Software Engineering, Volume 25, Issue 6",
volume = "25",
number = "6",
year = "2020",
publisher = "Springer Science and Business Media LLC",
url = "https://gwf-uwaterloo.github.io/gwf-publications/G20-16001",
doi = "10.1007/s10664-020-09863-2",
pages = "4707--4758",
abstract = "Developers often search for relevant code examples on the web for their programming tasks. Unfortunately, they face three major problems. First, they frequently need to read and analyse multiple results from the search engines to obtain a satisfactory solution. Second, the search is impaired due to a lexical gap between the query (task description) and the information associated with the solution (e.g., code example). Third, the retrieved solution may not be comprehensible, i.e., the code segment might miss a succinct explanation. To address these three problems, we propose CROKAGE (CrowdKnowledge Answer Generator), a tool that takes the description of a programming task (the query) as input and delivers a comprehensible solution for the task. Our solutions contain not only relevant code examples but also their succinct explanations written by human developers. The search for code examples is modeled as an Information Retrieval (IR) problem. We first leverage the crowd knowledge stored in Stack Overflow to retrieve the candidate answers against a programming task. For this, we use a fine-tuned IR technique, chosen after comparing 11 IR techniques in terms of performance. Then we use a multi-factor relevance mechanism to mitigate the lexical gap problem, and select the top quality answers related to the task. Finally, we perform natural language processing on the top quality answers and deliver the comprehensible solutions containing both code examples and code explanations unlike earlier studies. We evaluate and compare our approach against ten baselines, including the state-of-art. We show that CROKAGE outperforms the ten baselines in suggesting relevant solutions for 902 programming tasks (i.e., queries) of three popular programming languages: Java, Python and PHP. Furthermore, we use 24 programming tasks (queries) to evaluate our solutions with 29 developers and confirm that CROKAGE outperforms the state-of-art tool in terms of relevance of the suggested code examples, benefit of the code explanations and the overall solution quality (code + explanation).",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="daSilva-2020-CROKAGE:">
<titleInfo>
<title>CROKAGE: effective solution recommendation for programming tasks by leveraging crowd knowledge</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rodrigo</namePart>
<namePart type="given">Fernandes</namePart>
<namePart type="given">Gomes</namePart>
<namePart type="given">da</namePart>
<namePart type="family">Silva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chanchal</namePart>
<namePart type="given">K</namePart>
<namePart type="family">Roy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Masudur</namePart>
<namePart type="family">Rahman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="given">A</namePart>
<namePart type="family">Schneider</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Klérisson</namePart>
<namePart type="given">V</namePart>
<namePart type="given">R</namePart>
<namePart type="family">Paixão</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carlos</namePart>
<namePart type="given">Eduardo</namePart>
<namePart type="given">de</namePart>
<namePart type="given">Carvalho</namePart>
<namePart type="family">Dantas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcelo</namePart>
<namePart type="given">de</namePart>
<namePart type="given">Almeida</namePart>
<namePart type="family">Maia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Empirical Software Engineering, Volume 25, Issue 6</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>Springer Science and Business Media LLC</publisher>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>Developers often search for relevant code examples on the web for their programming tasks. Unfortunately, they face three major problems. First, they frequently need to read and analyse multiple results from the search engines to obtain a satisfactory solution. Second, the search is impaired due to a lexical gap between the query (task description) and the information associated with the solution (e.g., code example). Third, the retrieved solution may not be comprehensible, i.e., the code segment might miss a succinct explanation. To address these three problems, we propose CROKAGE (CrowdKnowledge Answer Generator), a tool that takes the description of a programming task (the query) as input and delivers a comprehensible solution for the task. Our solutions contain not only relevant code examples but also their succinct explanations written by human developers. The search for code examples is modeled as an Information Retrieval (IR) problem. We first leverage the crowd knowledge stored in Stack Overflow to retrieve the candidate answers against a programming task. For this, we use a fine-tuned IR technique, chosen after comparing 11 IR techniques in terms of performance. Then we use a multi-factor relevance mechanism to mitigate the lexical gap problem, and select the top quality answers related to the task. Finally, we perform natural language processing on the top quality answers and deliver the comprehensible solutions containing both code examples and code explanations unlike earlier studies. We evaluate and compare our approach against ten baselines, including the state-of-art. We show that CROKAGE outperforms the ten baselines in suggesting relevant solutions for 902 programming tasks (i.e., queries) of three popular programming languages: Java, Python and PHP. Furthermore, we use 24 programming tasks (queries) to evaluate our solutions with 29 developers and confirm that CROKAGE outperforms the state-of-art tool in terms of relevance of the suggested code examples, benefit of the code explanations and the overall solution quality (code + explanation).</abstract>
<identifier type="citekey">da Silva-2020-CROKAGE:</identifier>
<identifier type="doi">10.1007/s10664-020-09863-2</identifier>
<location>
<url>https://gwf-uwaterloo.github.io/gwf-publications/G20-16001</url>
</location>
<part>
<date>2020</date>
<detail type="volume"><number>25</number></detail>
<detail type="issue"><number>6</number></detail>
<extent unit="page">
<start>4707</start>
<end>4758</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T CROKAGE: effective solution recommendation for programming tasks by leveraging crowd knowledge
%A Silva, Rodrigo Fernandes Gomes da
%A Roy, Chanchal K.
%A Rahman, Mohammad Masudur
%A Schneider, Kevin A.
%A Paixão, Klérisson V. R.
%A Dantas, Carlos Eduardo de Carvalho
%A Maia, Marcelo de Almeida
%J Empirical Software Engineering, Volume 25, Issue 6
%D 2020
%V 25
%N 6
%I Springer Science and Business Media LLC
%F daSilva-2020-CROKAGE:
%X Developers often search for relevant code examples on the web for their programming tasks. Unfortunately, they face three major problems. First, they frequently need to read and analyse multiple results from the search engines to obtain a satisfactory solution. Second, the search is impaired due to a lexical gap between the query (task description) and the information associated with the solution (e.g., code example). Third, the retrieved solution may not be comprehensible, i.e., the code segment might miss a succinct explanation. To address these three problems, we propose CROKAGE (CrowdKnowledge Answer Generator), a tool that takes the description of a programming task (the query) as input and delivers a comprehensible solution for the task. Our solutions contain not only relevant code examples but also their succinct explanations written by human developers. The search for code examples is modeled as an Information Retrieval (IR) problem. We first leverage the crowd knowledge stored in Stack Overflow to retrieve the candidate answers against a programming task. For this, we use a fine-tuned IR technique, chosen after comparing 11 IR techniques in terms of performance. Then we use a multi-factor relevance mechanism to mitigate the lexical gap problem, and select the top quality answers related to the task. Finally, we perform natural language processing on the top quality answers and deliver the comprehensible solutions containing both code examples and code explanations unlike earlier studies. We evaluate and compare our approach against ten baselines, including the state-of-art. We show that CROKAGE outperforms the ten baselines in suggesting relevant solutions for 902 programming tasks (i.e., queries) of three popular programming languages: Java, Python and PHP. Furthermore, we use 24 programming tasks (queries) to evaluate our solutions with 29 developers and confirm that CROKAGE outperforms the state-of-art tool in terms of relevance of the suggested code examples, benefit of the code explanations and the overall solution quality (code + explanation).
%R 10.1007/s10664-020-09863-2
%U https://gwf-uwaterloo.github.io/gwf-publications/G20-16001
%U https://doi.org/10.1007/s10664-020-09863-2
%P 4707-4758
Markdown (Informal)
[CROKAGE: effective solution recommendation for programming tasks by leveraging crowd knowledge](https://gwf-uwaterloo.github.io/gwf-publications/G20-16001) (Silva et al., GWF 2020)
ACL
- Rodrigo Fernandes Gomes da Silva, Chanchal K. Roy, Mohammad Masudur Rahman, Kevin A. Schneider, Klérisson V. R. Paixão, Carlos Eduardo de Carvalho Dantas, and Marcelo de Almeida Maia. 2020. CROKAGE: effective solution recommendation for programming tasks by leveraging crowd knowledge. Empirical Software Engineering, Volume 25, Issue 6, 25(6):4707–4758.