@article{Mizukami-2019-On,
title = "On the choice of calibration metrics for {``}high-flow{''} estimation using hydrologic models",
author = "Mizukami, Naoki and
Rakovec, Old{\v{r}}ich and
Newman, Andrew J. and
Clark, Martyn P. and
Wood, A. W. and
Gupta, Hoshin and
Kumar, Rohini",
journal = "Hydrology and Earth System Sciences, Volume 23, Issue 6",
volume = "23",
number = "6",
year = "2019",
publisher = "Copernicus GmbH",
url = "https://gwf-uwaterloo.github.io/gwf-publications/G19-119001",
doi = "10.5194/hess-23-2601-2019",
pages = "2601--2614",
abstract = "Abstract. Calibration is an essential step for improving the accuracy of simulations generated using hydrologic models. A key modeling decision is selecting the performance metric to be optimized. It has been common to use squared error performance metrics, or normalized variants such as Nash{--}Sutcliffe efficiency (NSE), based on the idea that their squared-error nature will emphasize the estimates of high flows. However, we conclude that NSE-based model calibrations actually result in poor reproduction of high-flow events, such as the annual peak flows that are used for flood frequency estimation. Using three different types of performance metrics, we calibrate two hydrological models at a daily step, the Variable Infiltration Capacity (VIC) model and the mesoscale Hydrologic Model (mHM), and evaluate their ability to simulate high-flow events for 492 basins throughout the contiguous United States. The metrics investigated are (1) NSE, (2) Kling{--}Gupta efficiency (KGE) and its variants, and (3) annual peak flow bias (APFB), where the latter is an application-specific metric that focuses on annual peak flows. As expected, the APFB metric produces the best annual peak flow estimates; however, performance on other high-flow-related metrics is poor. In contrast, the use of NSE results in annual peak flow estimates that are more than 20 {\%} worse, primarily due to the tendency of NSE to underestimate observed flow variability. On the other hand, the use of KGE results in annual peak flow estimates that are better than from NSE, owing to improved flow time series metrics (mean and variance), with only a slight degradation in performance with respect to other related metrics, particularly when a non-standard weighting of the components of KGE is used. Stochastically generated ensemble simulations based on model residuals show the ability to improve the high-flow metrics, regardless of the deterministic performances. However, we emphasize that improving the fidelity of streamflow dynamics from deterministically calibrated models is still important, as it may improve high-flow metrics (for the right reasons). Overall, this work highlights the need for a deeper understanding of performance metric behavior and design in relation to the desired goals of model calibration.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="Mizukami-2019-On">
<titleInfo>
<title>On the choice of calibration metrics for “high-flow” estimation using hydrologic models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Naoki</namePart>
<namePart type="family">Mizukami</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Oldřich</namePart>
<namePart type="family">Rakovec</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrew</namePart>
<namePart type="given">J</namePart>
<namePart type="family">Newman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Martyn</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Clark</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">A</namePart>
<namePart type="given">W</namePart>
<namePart type="family">Wood</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hoshin</namePart>
<namePart type="family">Gupta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rohini</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Hydrology and Earth System Sciences, Volume 23, Issue 6</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>Copernicus GmbH</publisher>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>Abstract. Calibration is an essential step for improving the accuracy of simulations generated using hydrologic models. A key modeling decision is selecting the performance metric to be optimized. It has been common to use squared error performance metrics, or normalized variants such as Nash–Sutcliffe efficiency (NSE), based on the idea that their squared-error nature will emphasize the estimates of high flows. However, we conclude that NSE-based model calibrations actually result in poor reproduction of high-flow events, such as the annual peak flows that are used for flood frequency estimation. Using three different types of performance metrics, we calibrate two hydrological models at a daily step, the Variable Infiltration Capacity (VIC) model and the mesoscale Hydrologic Model (mHM), and evaluate their ability to simulate high-flow events for 492 basins throughout the contiguous United States. The metrics investigated are (1) NSE, (2) Kling–Gupta efficiency (KGE) and its variants, and (3) annual peak flow bias (APFB), where the latter is an application-specific metric that focuses on annual peak flows. As expected, the APFB metric produces the best annual peak flow estimates; however, performance on other high-flow-related metrics is poor. In contrast, the use of NSE results in annual peak flow estimates that are more than 20 % worse, primarily due to the tendency of NSE to underestimate observed flow variability. On the other hand, the use of KGE results in annual peak flow estimates that are better than from NSE, owing to improved flow time series metrics (mean and variance), with only a slight degradation in performance with respect to other related metrics, particularly when a non-standard weighting of the components of KGE is used. Stochastically generated ensemble simulations based on model residuals show the ability to improve the high-flow metrics, regardless of the deterministic performances. However, we emphasize that improving the fidelity of streamflow dynamics from deterministically calibrated models is still important, as it may improve high-flow metrics (for the right reasons). Overall, this work highlights the need for a deeper understanding of performance metric behavior and design in relation to the desired goals of model calibration.</abstract>
<identifier type="citekey">Mizukami-2019-On</identifier>
<identifier type="doi">10.5194/hess-23-2601-2019</identifier>
<location>
<url>https://gwf-uwaterloo.github.io/gwf-publications/G19-119001</url>
</location>
<part>
<date>2019</date>
<detail type="volume"><number>23</number></detail>
<detail type="issue"><number>6</number></detail>
<extent unit="page">
<start>2601</start>
<end>2614</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T On the choice of calibration metrics for “high-flow” estimation using hydrologic models
%A Mizukami, Naoki
%A Rakovec, Oldřich
%A Newman, Andrew J.
%A Clark, Martyn P.
%A Wood, A. W.
%A Gupta, Hoshin
%A Kumar, Rohini
%J Hydrology and Earth System Sciences, Volume 23, Issue 6
%D 2019
%V 23
%N 6
%I Copernicus GmbH
%F Mizukami-2019-On
%X Abstract. Calibration is an essential step for improving the accuracy of simulations generated using hydrologic models. A key modeling decision is selecting the performance metric to be optimized. It has been common to use squared error performance metrics, or normalized variants such as Nash–Sutcliffe efficiency (NSE), based on the idea that their squared-error nature will emphasize the estimates of high flows. However, we conclude that NSE-based model calibrations actually result in poor reproduction of high-flow events, such as the annual peak flows that are used for flood frequency estimation. Using three different types of performance metrics, we calibrate two hydrological models at a daily step, the Variable Infiltration Capacity (VIC) model and the mesoscale Hydrologic Model (mHM), and evaluate their ability to simulate high-flow events for 492 basins throughout the contiguous United States. The metrics investigated are (1) NSE, (2) Kling–Gupta efficiency (KGE) and its variants, and (3) annual peak flow bias (APFB), where the latter is an application-specific metric that focuses on annual peak flows. As expected, the APFB metric produces the best annual peak flow estimates; however, performance on other high-flow-related metrics is poor. In contrast, the use of NSE results in annual peak flow estimates that are more than 20 % worse, primarily due to the tendency of NSE to underestimate observed flow variability. On the other hand, the use of KGE results in annual peak flow estimates that are better than from NSE, owing to improved flow time series metrics (mean and variance), with only a slight degradation in performance with respect to other related metrics, particularly when a non-standard weighting of the components of KGE is used. Stochastically generated ensemble simulations based on model residuals show the ability to improve the high-flow metrics, regardless of the deterministic performances. However, we emphasize that improving the fidelity of streamflow dynamics from deterministically calibrated models is still important, as it may improve high-flow metrics (for the right reasons). Overall, this work highlights the need for a deeper understanding of performance metric behavior and design in relation to the desired goals of model calibration.
%R 10.5194/hess-23-2601-2019
%U https://gwf-uwaterloo.github.io/gwf-publications/G19-119001
%U https://doi.org/10.5194/hess-23-2601-2019
%P 2601-2614
Markdown (Informal)
[On the choice of calibration metrics for “high-flow” estimation using hydrologic models](https://gwf-uwaterloo.github.io/gwf-publications/G19-119001) (Mizukami et al., GWF 2019)
ACL
- Naoki Mizukami, Oldřich Rakovec, Andrew J. Newman, Martyn P. Clark, A. W. Wood, Hoshin Gupta, and Rohini Kumar. 2019. On the choice of calibration metrics for “high-flow” estimation using hydrologic models. Hydrology and Earth System Sciences, Volume 23, Issue 6, 23(6):2601–2614.