@article{Bhattacharjee-Roy-2022-Supporting-Readability,
title = "Supporting Readability by Comprehending the Hierarchical Abstraction of a Software Project",
author = "Bhattacharjee, Avijit and
Roy, Banani and
Schneider, Kevin A.",
journal = "15th Innovations in Software Engineering Conference",
year = "2022",
publisher = "ACM",
url = "https://gwf-uwaterloo.github.io/gwf-publications/G22-9001",
doi = "10.1145/3511430.3511441",
abstract = "Exploring the source code of a software system is a prevailing task that is frequently done by contributors to a system. Practitioners often use call graphs to aid in understanding the source code of an inadequately documented software system. Call graphs, when visualized, show caller and callee relationships between functions. A static call graph provides an overall structure of a software system and dynamic call graphs generated from dynamic execution logs can be used to trace program behaviour for a particular scenario. Unfortunately a call graph of an entire system can be very complicated and hard to understand. Hierarchically abstracting a call graph can be used to summarize an entire system{'}s structure and more easily comprehending function calls. In this work, we mine concepts from source code entities (functions) to generate a concept cluster tree with improved naming of cluster nodes to complement existing studies and facilitate more effective program comprehension for developers. We apply three different information retrieval techniques (TFIDF, LDA, and LSI) on function names and function name variants to label the nodes of a concept cluster tree generated by clustering execution paths. From our experiment in comparing automatic labelling with manual labeling by participants for 12 use cases, we found that among the techniques on average, TFIDF performs better with 64{\%} matching. LDA and LSI had 37{\%} and 23{\%} matching respectively. In addition, using the words in function name variants performed at least 5{\%} better in participant ratings for all three techniques on average for the use cases.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="Bhattacharjee-Roy-2022-Supporting-Readability">
<titleInfo>
<title>Supporting Readability by Comprehending the Hierarchical Abstraction of a Software Project</title>
</titleInfo>
<name type="personal">
<namePart type="given">Avijit</namePart>
<namePart type="family">Bhattacharjee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Banani</namePart>
<namePart type="family">Roy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="given">A</namePart>
<namePart type="family">Schneider</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>15th Innovations in Software Engineering Conference</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>ACM</publisher>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>Exploring the source code of a software system is a prevailing task that is frequently done by contributors to a system. Practitioners often use call graphs to aid in understanding the source code of an inadequately documented software system. Call graphs, when visualized, show caller and callee relationships between functions. A static call graph provides an overall structure of a software system and dynamic call graphs generated from dynamic execution logs can be used to trace program behaviour for a particular scenario. Unfortunately a call graph of an entire system can be very complicated and hard to understand. Hierarchically abstracting a call graph can be used to summarize an entire system’s structure and more easily comprehending function calls. In this work, we mine concepts from source code entities (functions) to generate a concept cluster tree with improved naming of cluster nodes to complement existing studies and facilitate more effective program comprehension for developers. We apply three different information retrieval techniques (TFIDF, LDA, and LSI) on function names and function name variants to label the nodes of a concept cluster tree generated by clustering execution paths. From our experiment in comparing automatic labelling with manual labeling by participants for 12 use cases, we found that among the techniques on average, TFIDF performs better with 64% matching. LDA and LSI had 37% and 23% matching respectively. In addition, using the words in function name variants performed at least 5% better in participant ratings for all three techniques on average for the use cases.</abstract>
<identifier type="citekey">Bhattacharjee-Roy-2022-Supporting-Readability</identifier>
<identifier type="doi">10.1145/3511430.3511441</identifier>
<location>
<url>https://gwf-uwaterloo.github.io/gwf-publications/G22-9001</url>
</location>
<part>
<date>2022</date>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T Supporting Readability by Comprehending the Hierarchical Abstraction of a Software Project
%A Bhattacharjee, Avijit
%A Roy, Banani
%A Schneider, Kevin A.
%J 15th Innovations in Software Engineering Conference
%D 2022
%I ACM
%F Bhattacharjee-Roy-2022-Supporting-Readability
%X Exploring the source code of a software system is a prevailing task that is frequently done by contributors to a system. Practitioners often use call graphs to aid in understanding the source code of an inadequately documented software system. Call graphs, when visualized, show caller and callee relationships between functions. A static call graph provides an overall structure of a software system and dynamic call graphs generated from dynamic execution logs can be used to trace program behaviour for a particular scenario. Unfortunately a call graph of an entire system can be very complicated and hard to understand. Hierarchically abstracting a call graph can be used to summarize an entire system’s structure and more easily comprehending function calls. In this work, we mine concepts from source code entities (functions) to generate a concept cluster tree with improved naming of cluster nodes to complement existing studies and facilitate more effective program comprehension for developers. We apply three different information retrieval techniques (TFIDF, LDA, and LSI) on function names and function name variants to label the nodes of a concept cluster tree generated by clustering execution paths. From our experiment in comparing automatic labelling with manual labeling by participants for 12 use cases, we found that among the techniques on average, TFIDF performs better with 64% matching. LDA and LSI had 37% and 23% matching respectively. In addition, using the words in function name variants performed at least 5% better in participant ratings for all three techniques on average for the use cases.
%R 10.1145/3511430.3511441
%U https://gwf-uwaterloo.github.io/gwf-publications/G22-9001
%U https://doi.org/10.1145/3511430.3511441
Markdown (Informal)
[Supporting Readability by Comprehending the Hierarchical Abstraction of a Software Project](https://gwf-uwaterloo.github.io/gwf-publications/G22-9001) (Bhattacharjee et al., GWF 2022)
ACL
- Avijit Bhattacharjee, Banani Roy, and Kevin A. Schneider. 2022. Supporting Readability by Comprehending the Hierarchical Abstraction of a Software Project. 15th Innovations in Software Engineering Conference.