@article{Wang-2018-CCAligner,
title = "CCAligner",
author = "Wang, Pengcheng and
Svajlenko, Jeffrey and
Wu, Yanzhao and
Xu, Yun and
Roy, Chanchal K.",
journal = "Proceedings of the 40th International Conference on Software Engineering",
year = "2018",
publisher = "ACM",
url = "https://gwf-uwaterloo.github.io/gwf-publications/G18-6001",
doi = "10.1145/3180155.3180179",
abstract = "Copying code and then pasting with large number of edits is a common activity in software development, and the pasted code is a kind of complicated Type-3 clone. Due to large number of edits, we consider the clone as a large-gap clone. Large-gap clone can reflect the extension of code, such as change and improvement. The existing state-of-the-art clone detectors suffer from several limitations in detecting large-gap clones. In this paper, we propose a tool, CCAligner, using code window that considers e edit distance for matching to detect large-gap clones. In our approach, a novel e-mismatch index is designed and the asymmetric similarity coefficient is used for similarity measure. We thoroughly evaluate CCAligner both for large-gap clone detection, and for general Type-1, Type-2 and Type-3 clone detection. The results show that CCAligner performs better than other competing tools in large-gap clone detection, and has the best execution time for 10MLOC input with good precision and recall in general Type-1 to Type-3 clone detection. Compared with existing state-of-the-art tools, CCAligner is the best performing large-gap clone detection tool, and remains competitive with the best clone detectors in general Type-1, Type-2 and Type-3 clone detection.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="Wang-2018-CCAligner">
<titleInfo>
<title>CCAligner</title>
</titleInfo>
<name type="personal">
<namePart type="given">Pengcheng</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jeffrey</namePart>
<namePart type="family">Svajlenko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yanzhao</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chanchal</namePart>
<namePart type="given">K</namePart>
<namePart type="family">Roy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 40th International Conference on Software Engineering</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>ACM</publisher>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>Copying code and then pasting with large number of edits is a common activity in software development, and the pasted code is a kind of complicated Type-3 clone. Due to large number of edits, we consider the clone as a large-gap clone. Large-gap clone can reflect the extension of code, such as change and improvement. The existing state-of-the-art clone detectors suffer from several limitations in detecting large-gap clones. In this paper, we propose a tool, CCAligner, using code window that considers e edit distance for matching to detect large-gap clones. In our approach, a novel e-mismatch index is designed and the asymmetric similarity coefficient is used for similarity measure. We thoroughly evaluate CCAligner both for large-gap clone detection, and for general Type-1, Type-2 and Type-3 clone detection. The results show that CCAligner performs better than other competing tools in large-gap clone detection, and has the best execution time for 10MLOC input with good precision and recall in general Type-1 to Type-3 clone detection. Compared with existing state-of-the-art tools, CCAligner is the best performing large-gap clone detection tool, and remains competitive with the best clone detectors in general Type-1, Type-2 and Type-3 clone detection.</abstract>
<identifier type="citekey">Wang-2018-CCAligner</identifier>
<identifier type="doi">10.1145/3180155.3180179</identifier>
<location>
<url>https://gwf-uwaterloo.github.io/gwf-publications/G18-6001</url>
</location>
<part>
<date>2018</date>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T CCAligner
%A Wang, Pengcheng
%A Svajlenko, Jeffrey
%A Wu, Yanzhao
%A Xu, Yun
%A Roy, Chanchal K.
%J Proceedings of the 40th International Conference on Software Engineering
%D 2018
%I ACM
%F Wang-2018-CCAligner
%X Copying code and then pasting with large number of edits is a common activity in software development, and the pasted code is a kind of complicated Type-3 clone. Due to large number of edits, we consider the clone as a large-gap clone. Large-gap clone can reflect the extension of code, such as change and improvement. The existing state-of-the-art clone detectors suffer from several limitations in detecting large-gap clones. In this paper, we propose a tool, CCAligner, using code window that considers e edit distance for matching to detect large-gap clones. In our approach, a novel e-mismatch index is designed and the asymmetric similarity coefficient is used for similarity measure. We thoroughly evaluate CCAligner both for large-gap clone detection, and for general Type-1, Type-2 and Type-3 clone detection. The results show that CCAligner performs better than other competing tools in large-gap clone detection, and has the best execution time for 10MLOC input with good precision and recall in general Type-1 to Type-3 clone detection. Compared with existing state-of-the-art tools, CCAligner is the best performing large-gap clone detection tool, and remains competitive with the best clone detectors in general Type-1, Type-2 and Type-3 clone detection.
%R 10.1145/3180155.3180179
%U https://gwf-uwaterloo.github.io/gwf-publications/G18-6001
%U https://doi.org/10.1145/3180155.3180179
Markdown (Informal)
[CCAligner](https://gwf-uwaterloo.github.io/gwf-publications/G18-6001) (Wang et al., GWF 2018)
ACL
- Pengcheng Wang, Jeffrey Svajlenko, Yanzhao Wu, Yun Xu, and Chanchal K. Roy. 2018. CCAligner. Proceedings of the 40th International Conference on Software Engineering.