@article{Nafi-2018-[Research,
title = "[Research Paper] CroLSim: Cross Language Software Similarity Detector Using API Documentation",
author = "Nafi, Kawser Wazed and
Roy, Banani and
Roy, Chanchal K. and
Schneider, Kevin A.",
journal = "2018 IEEE 18th International Working Conference on Source Code Analysis and Manipulation (SCAM)",
year = "2018",
publisher = "IEEE",
url = "https://gwf-uwaterloo.github.io/gwf-publications/G18-55003",
doi = "10.1109/scam.2018.00023",
abstract = "In today's open source era, developers look forsimilar software applications in source code repositories for anumber of reasons, including, exploring alternative implementations, reusing source code, or looking for a better application. However, while there are a great many studies for finding similarapplications written in the same programming language, there isa marked lack of studies for finding similar software applicationswritten in different languages. In this paper, we fill the gapby proposing a novel modelCroLSimwhich is able to detectsimilar software applications across different programming lan-guages. In our approach, we use the API documentation tofind relationships among the API calls used by the differentprogramming languages. We adopt a deep learning based word-vector learning method to identify semantic relationships amongthe API documentation which we then use to detect cross-language similar software applications. For evaluating CroLSim, we formed a repository consisting of 8,956 Java, 7,658 C{\#}, and 10,232 Python applications collected from GitHub. Weobserved thatCroLSimcan successfully detect similar softwareapplications across different programming languages with a meanaverage precision rate of 0.65, an average confidence rate of3.6 (out of 5) with 75{\%} high rated successful queries, whichoutperforms all related existing approaches with a significantperformance improvement.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="Nafi-2018-[Research">
<titleInfo>
<title>[Research Paper] CroLSim: Cross Language Software Similarity Detector Using API Documentation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kawser</namePart>
<namePart type="given">Wazed</namePart>
<namePart type="family">Nafi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Banani</namePart>
<namePart type="family">Roy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chanchal</namePart>
<namePart type="given">K</namePart>
<namePart type="family">Roy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="given">A</namePart>
<namePart type="family">Schneider</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>2018 IEEE 18th International Working Conference on Source Code Analysis and Manipulation (SCAM)</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>IEEE</publisher>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>In today’s open source era, developers look forsimilar software applications in source code repositories for anumber of reasons, including, exploring alternative implementations, reusing source code, or looking for a better application. However, while there are a great many studies for finding similarapplications written in the same programming language, there isa marked lack of studies for finding similar software applicationswritten in different languages. In this paper, we fill the gapby proposing a novel modelCroLSimwhich is able to detectsimilar software applications across different programming lan-guages. In our approach, we use the API documentation tofind relationships among the API calls used by the differentprogramming languages. We adopt a deep learning based word-vector learning method to identify semantic relationships amongthe API documentation which we then use to detect cross-language similar software applications. For evaluating CroLSim, we formed a repository consisting of 8,956 Java, 7,658 C#, and 10,232 Python applications collected from GitHub. Weobserved thatCroLSimcan successfully detect similar softwareapplications across different programming languages with a meanaverage precision rate of 0.65, an average confidence rate of3.6 (out of 5) with 75% high rated successful queries, whichoutperforms all related existing approaches with a significantperformance improvement.</abstract>
<identifier type="citekey">Nafi-2018-[Research</identifier>
<identifier type="doi">10.1109/scam.2018.00023</identifier>
<location>
<url>https://gwf-uwaterloo.github.io/gwf-publications/G18-55003</url>
</location>
<part>
<date>2018</date>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T [Research Paper] CroLSim: Cross Language Software Similarity Detector Using API Documentation
%A Nafi, Kawser Wazed
%A Roy, Banani
%A Roy, Chanchal K.
%A Schneider, Kevin A.
%J 2018 IEEE 18th International Working Conference on Source Code Analysis and Manipulation (SCAM)
%D 2018
%I IEEE
%F Nafi-2018-[Research
%X In today’s open source era, developers look forsimilar software applications in source code repositories for anumber of reasons, including, exploring alternative implementations, reusing source code, or looking for a better application. However, while there are a great many studies for finding similarapplications written in the same programming language, there isa marked lack of studies for finding similar software applicationswritten in different languages. In this paper, we fill the gapby proposing a novel modelCroLSimwhich is able to detectsimilar software applications across different programming lan-guages. In our approach, we use the API documentation tofind relationships among the API calls used by the differentprogramming languages. We adopt a deep learning based word-vector learning method to identify semantic relationships amongthe API documentation which we then use to detect cross-language similar software applications. For evaluating CroLSim, we formed a repository consisting of 8,956 Java, 7,658 C#, and 10,232 Python applications collected from GitHub. Weobserved thatCroLSimcan successfully detect similar softwareapplications across different programming languages with a meanaverage precision rate of 0.65, an average confidence rate of3.6 (out of 5) with 75% high rated successful queries, whichoutperforms all related existing approaches with a significantperformance improvement.
%R 10.1109/scam.2018.00023
%U https://gwf-uwaterloo.github.io/gwf-publications/G18-55003
%U https://doi.org/10.1109/scam.2018.00023
Markdown (Informal)
[[Research Paper] CroLSim: Cross Language Software Similarity Detector Using API Documentation](https://gwf-uwaterloo.github.io/gwf-publications/G18-55003) (Nafi et al., GWF 2018)
ACL
- Kawser Wazed Nafi, Banani Roy, Chanchal K. Roy, and Kevin A. Schneider. 2018. [Research Paper] CroLSim: Cross Language Software Similarity Detector Using API Documentation. 2018 IEEE 18th International Working Conference on Source Code Analysis and Manipulation (SCAM).