@article{Liaw-2021-Explaining,
title = "Explaining the Shortcomings of Log‐Transforming the Dependent Variable in Regression Models and Recommending a Better Alternative: Evidence From Soil CO {\textless}sub{\textgreater}2{\textless}/sub{\textgreater} Emission Studies",
author = "Liaw, Kao‐Lee and
Khomik, Myroslava and
Arain, M. Altaf",
journal = "Journal of Geophysical Research: Biogeosciences, Volume 126, Issue 5",
volume = "126",
number = "5",
year = "2021",
publisher = "American Geophysical Union (AGU)",
url = "https://gwf-uwaterloo.github.io/gwf-publications/G21-96002",
doi = "10.1029/2021jg006238",
abstract = "Log-transforming the dependent variable of a regression model, though convenient and frequently used, is accompanied by an under-prediction problem. We found that this underprediction can reach up to 20{\%}, which is significant in studies that aim to estimate annual budgets. The fundamental reason for this problem is simply that the log-function is concave, and it has nothing to do with whether the dependent variable has a log-normal distribution or not. Using field-observed data of soil CO2 emission, soil temperature and soil moisture in a saturated-specification of a regression model for predicting emissions, we revealed that the under-predictions of the log-transformed approach were pervasive and systematically biased. The key determinant of the problem's severity was the coefficient of variation in the dependent variable that differed among different combinations of the values of the explanatory factors. By applying a parsimonious (Gaussian-Gamma) specification of the regression model to data from four different ecosystems, we found that this under-prediction problem was serious to various extents, and that for a relatively weak explanatory factor, the log-transformed approach is prone to yield a physically nonsensical estimated coefficient. Finally, we showed and concluded that the problem can be avoided by switching to the nonlinear approach, which does not require the assumption of homoscedasticity for the error term in computing the standard errors of the estimated coefficients.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="Liaw-2021-Explaining">
<titleInfo>
<title>Explaining the Shortcomings of Log‐Transforming the Dependent Variable in Regression Models and Recommending a Better Alternative: Evidence From Soil CO \textlesssub\textgreater2\textless/sub\textgreater Emission Studies</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kao‐Lee</namePart>
<namePart type="family">Liaw</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Myroslava</namePart>
<namePart type="family">Khomik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">M</namePart>
<namePart type="given">Altaf</namePart>
<namePart type="family">Arain</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Journal of Geophysical Research: Biogeosciences, Volume 126, Issue 5</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>American Geophysical Union (AGU)</publisher>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>Log-transforming the dependent variable of a regression model, though convenient and frequently used, is accompanied by an under-prediction problem. We found that this underprediction can reach up to 20%, which is significant in studies that aim to estimate annual budgets. The fundamental reason for this problem is simply that the log-function is concave, and it has nothing to do with whether the dependent variable has a log-normal distribution or not. Using field-observed data of soil CO2 emission, soil temperature and soil moisture in a saturated-specification of a regression model for predicting emissions, we revealed that the under-predictions of the log-transformed approach were pervasive and systematically biased. The key determinant of the problem’s severity was the coefficient of variation in the dependent variable that differed among different combinations of the values of the explanatory factors. By applying a parsimonious (Gaussian-Gamma) specification of the regression model to data from four different ecosystems, we found that this under-prediction problem was serious to various extents, and that for a relatively weak explanatory factor, the log-transformed approach is prone to yield a physically nonsensical estimated coefficient. Finally, we showed and concluded that the problem can be avoided by switching to the nonlinear approach, which does not require the assumption of homoscedasticity for the error term in computing the standard errors of the estimated coefficients.</abstract>
<identifier type="citekey">Liaw-2021-Explaining</identifier>
<identifier type="doi">10.1029/2021jg006238</identifier>
<location>
<url>https://gwf-uwaterloo.github.io/gwf-publications/G21-96002</url>
</location>
<part>
<date>2021</date>
<detail type="volume"><number>126</number></detail>
<detail type="issue"><number>5</number></detail>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T Explaining the Shortcomings of Log‐Transforming the Dependent Variable in Regression Models and Recommending a Better Alternative: Evidence From Soil CO \textlesssub\textgreater2\textless/sub\textgreater Emission Studies
%A Liaw, Kao‐Lee
%A Khomik, Myroslava
%A Arain, M. Altaf
%J Journal of Geophysical Research: Biogeosciences, Volume 126, Issue 5
%D 2021
%V 126
%N 5
%I American Geophysical Union (AGU)
%F Liaw-2021-Explaining
%X Log-transforming the dependent variable of a regression model, though convenient and frequently used, is accompanied by an under-prediction problem. We found that this underprediction can reach up to 20%, which is significant in studies that aim to estimate annual budgets. The fundamental reason for this problem is simply that the log-function is concave, and it has nothing to do with whether the dependent variable has a log-normal distribution or not. Using field-observed data of soil CO2 emission, soil temperature and soil moisture in a saturated-specification of a regression model for predicting emissions, we revealed that the under-predictions of the log-transformed approach were pervasive and systematically biased. The key determinant of the problem’s severity was the coefficient of variation in the dependent variable that differed among different combinations of the values of the explanatory factors. By applying a parsimonious (Gaussian-Gamma) specification of the regression model to data from four different ecosystems, we found that this under-prediction problem was serious to various extents, and that for a relatively weak explanatory factor, the log-transformed approach is prone to yield a physically nonsensical estimated coefficient. Finally, we showed and concluded that the problem can be avoided by switching to the nonlinear approach, which does not require the assumption of homoscedasticity for the error term in computing the standard errors of the estimated coefficients.
%R 10.1029/2021jg006238
%U https://gwf-uwaterloo.github.io/gwf-publications/G21-96002
%U https://doi.org/10.1029/2021jg006238
Markdown (Informal)
[Explaining the Shortcomings of Log‐Transforming the Dependent Variable in Regression Models and Recommending a Better Alternative: Evidence From Soil CO <sub>2</sub> Emission Studies](https://gwf-uwaterloo.github.io/gwf-publications/G21-96002) (Liaw et al., GWF 2021)
ACL
- Kao‐Lee Liaw, Myroslava Khomik, and M. Altaf Arain. 2021. Explaining the Shortcomings of Log‐Transforming the Dependent Variable in Regression Models and Recommending a Better Alternative: Evidence From Soil CO 2 Emission Studies. Journal of Geophysical Research: Biogeosciences, Volume 126, Issue 5, 126(5).