@article{Saifullah-2019-Learning,
title = "Learning from Examples to Find Fully Qualified Names of API Elements in Code Snippets",
author = "Saifullah, C M Khaled and
Asaduzzaman, Muhammad and
Roy, Chanchal K.",
journal = "2019 34th IEEE/ACM International Conference on Automated Software Engineering (ASE)",
year = "2019",
publisher = "IEEE",
url = "https://gwf-uwaterloo.github.io/gwf-publications/G19-130002",
doi = "10.1109/ase.2019.00032",
abstract = "Developers often reuse code snippets from online forums, such as Stack Overflow, to learn API usages of software frameworks or libraries. These code snippets often contain ambiguous undeclared external references. Such external references make it difficult to learn and use those APIs correctly. In particular, reusing code snippets containing such ambiguous undeclared external references requires significant manual efforts and expertise to resolve them. Manually resolving fully qualified names (FQN) of API elements is a non-trivial task. In this paper, we propose a novel context-sensitive technique, called COSTER, to resolve FQNs of API elements in such code snippets. The proposed technique collects locally specific source code elements as well as globally related tokens as the context of FQNs, calculates likelihood scores, and builds an occurrence likelihood dictionary (OLD). Given an API element as a query, COSTER captures the context of the query API element, matches that with the FQNs of API elements stored in the OLD, and rank those matched FQNs leveraging three different scores: likelihood, context similarity, and name similarity scores. Evaluation with more than 600K code examples collected from GitHub and two different Stack Overflow datasets shows that our proposed technique improves precision by 4-6{\%} and recall by 3-22{\%} compared to state-of-the-art techniques. The proposed technique significantly reduces the training time compared to the StatType, a state-of-the-art technique, without sacrificing accuracy. Extensive analyses on results demonstrate the robustness of the proposed technique.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="Saifullah-2019-Learning">
<titleInfo>
<title>Learning from Examples to Find Fully Qualified Names of API Elements in Code Snippets</title>
</titleInfo>
<name type="personal">
<namePart type="given">C</namePart>
<namePart type="given">M</namePart>
<namePart type="given">Khaled</namePart>
<namePart type="family">Saifullah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Muhammad</namePart>
<namePart type="family">Asaduzzaman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chanchal</namePart>
<namePart type="given">K</namePart>
<namePart type="family">Roy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>2019 34th IEEE/ACM International Conference on Automated Software Engineering (ASE)</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>IEEE</publisher>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>Developers often reuse code snippets from online forums, such as Stack Overflow, to learn API usages of software frameworks or libraries. These code snippets often contain ambiguous undeclared external references. Such external references make it difficult to learn and use those APIs correctly. In particular, reusing code snippets containing such ambiguous undeclared external references requires significant manual efforts and expertise to resolve them. Manually resolving fully qualified names (FQN) of API elements is a non-trivial task. In this paper, we propose a novel context-sensitive technique, called COSTER, to resolve FQNs of API elements in such code snippets. The proposed technique collects locally specific source code elements as well as globally related tokens as the context of FQNs, calculates likelihood scores, and builds an occurrence likelihood dictionary (OLD). Given an API element as a query, COSTER captures the context of the query API element, matches that with the FQNs of API elements stored in the OLD, and rank those matched FQNs leveraging three different scores: likelihood, context similarity, and name similarity scores. Evaluation with more than 600K code examples collected from GitHub and two different Stack Overflow datasets shows that our proposed technique improves precision by 4-6% and recall by 3-22% compared to state-of-the-art techniques. The proposed technique significantly reduces the training time compared to the StatType, a state-of-the-art technique, without sacrificing accuracy. Extensive analyses on results demonstrate the robustness of the proposed technique.</abstract>
<identifier type="citekey">Saifullah-2019-Learning</identifier>
<identifier type="doi">10.1109/ase.2019.00032</identifier>
<location>
<url>https://gwf-uwaterloo.github.io/gwf-publications/G19-130002</url>
</location>
<part>
<date>2019</date>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T Learning from Examples to Find Fully Qualified Names of API Elements in Code Snippets
%A Saifullah, C. M. Khaled
%A Asaduzzaman, Muhammad
%A Roy, Chanchal K.
%J 2019 34th IEEE/ACM International Conference on Automated Software Engineering (ASE)
%D 2019
%I IEEE
%F Saifullah-2019-Learning
%X Developers often reuse code snippets from online forums, such as Stack Overflow, to learn API usages of software frameworks or libraries. These code snippets often contain ambiguous undeclared external references. Such external references make it difficult to learn and use those APIs correctly. In particular, reusing code snippets containing such ambiguous undeclared external references requires significant manual efforts and expertise to resolve them. Manually resolving fully qualified names (FQN) of API elements is a non-trivial task. In this paper, we propose a novel context-sensitive technique, called COSTER, to resolve FQNs of API elements in such code snippets. The proposed technique collects locally specific source code elements as well as globally related tokens as the context of FQNs, calculates likelihood scores, and builds an occurrence likelihood dictionary (OLD). Given an API element as a query, COSTER captures the context of the query API element, matches that with the FQNs of API elements stored in the OLD, and rank those matched FQNs leveraging three different scores: likelihood, context similarity, and name similarity scores. Evaluation with more than 600K code examples collected from GitHub and two different Stack Overflow datasets shows that our proposed technique improves precision by 4-6% and recall by 3-22% compared to state-of-the-art techniques. The proposed technique significantly reduces the training time compared to the StatType, a state-of-the-art technique, without sacrificing accuracy. Extensive analyses on results demonstrate the robustness of the proposed technique.
%R 10.1109/ase.2019.00032
%U https://gwf-uwaterloo.github.io/gwf-publications/G19-130002
%U https://doi.org/10.1109/ase.2019.00032
Markdown (Informal)
[Learning from Examples to Find Fully Qualified Names of API Elements in Code Snippets](https://gwf-uwaterloo.github.io/gwf-publications/G19-130002) (Saifullah et al., GWF 2019)
ACL
- C M Khaled Saifullah, Muhammad Asaduzzaman, and Chanchal K. Roy. 2019. Learning from Examples to Find Fully Qualified Names of API Elements in Code Snippets. 2019 34th IEEE/ACM International Conference on Automated Software Engineering (ASE).