Rloose

Adding, subtracting, harmonic mean, and arithmetic mean of likelihoods in log-space (log-likelihoods)

The simple way to add, subtract, or average log-likelihoods is to exp() them, and then do normal math. However, this will fail for LnLs below about -745, because of precision problems (exp(-746) equals 0 in R).

Here I am storing some functions that do some transformations to do these operations. Some tests are also included, but I have not tried to test for every possible case.

Note 1: If you want the average of several likelihoods, calculate the harmonic mean, because likelihoods (at least in the case of discrete data) are proportions: https://en.wikipedia.org/wiki/Harmonic_mean

Note 2: This does not constitute an endorsement of calculating the harmonic mean of likelihoods. It's not entirely clear what one is doing with such an operation. If the mean just intended as a summary of several runs, that's OK, if described as such in a paper. For example, perhaps you have maximum likelihoods from a particular model on 5 sampled trees.

However, please note that the mean of several maximum likelihood LnLs is not equivalent to either a posterior distribution nor to a maximum likelihood value, so you can't run model tests etc. on it. It's just a summary of the central tendency of the likelihood values you have.

There is, additionally, a controversy over a harmonic mean estimator for other reasons, specifically in the case of summarizing model support in fully Bayesian analyses, but this is the harmonic mean of sampled marginal likelihoods, not the harmonic mean of some maximum likelihoods. The problem is that the estimator has infinite variance. See: The Harmonic Mean of the Likelihood: Worst Monte Carlo Method Ever. Stepping-stone sampling is an example of a better method.

#####################################################
# Sum of several likelihoods, starting from their log-likelihoods
#####################################################
sum_likes_using_LnLs <- function(LnLs, traditional=FALSE)
    {
    if (traditional == FALSE)
        {
        # Sum two likelihoods, starting from their LnLs

        # Number of input LnLs
        n = length(LnLs)

        # To add the non-log values, take
        # the minimum of these log values and subtract it from each value
        # (you are just dividing by the min)
        max_of_LnLs = max(LnLs)
        LnLs_minus_max = LnLs - max_of_LnLs

        # Take the exponent
        exp_LnLs_minus_max = exp(LnLs_minus_max)

        # Now these are in normal space, so add them
        sum_exp_LnLs_minus_max = sum(exp_LnLs_minus_max)
        sum_exp_LnLs_minus_max

        # Now, log summed value
        logsum_exp_LnLs_minus_max = log(sum_exp_LnLs_minus_max)
        logsum_exp_LnLs_minus_max

        # Now, multiply by the min times n (add the min in log space to each value)
        logsum_exp_LnLs_plus_max = logsum_exp_LnLs_minus_max + (max_of_LnLs)
        logsum_exp_LnLs_plus_max

        return(logsum_exp_LnLs_plus_max)
        } else {

        # The simple way
        likes = exp(LnLs)
        sum(likes)
        logsum_likes = log(sum(likes))
        logsum_likes

        return(logsum_likes)
        } # END if (traditional == FALSE)
    } # END sum_likes_using_LnLs <- function(LnLs, traditional=FALSE)

LnLs = c(-1, -2, -3)
sum_likes_using_LnLs(LnLs, traditional=FALSE)
sum_likes_using_LnLs(LnLs, traditional=TRUE)

LnLs = c(-0.01, -1, -2, -3)
sum_likes_using_LnLs(LnLs, traditional=FALSE)
sum_likes_using_LnLs(LnLs, traditional=TRUE)

LnLs = -1*c(-1, -2, -3)
sum_likes_using_LnLs(LnLs, traditional=FALSE)
sum_likes_using_LnLs(LnLs, traditional=TRUE)

LnLs = c(0, -1, -2, -3)
sum_likes_using_LnLs(LnLs, traditional=FALSE)
sum_likes_using_LnLs(LnLs, traditional=TRUE)

LnLs = -1*c(0, -1, -2, -3)
sum_likes_using_LnLs(LnLs, traditional=FALSE)
sum_likes_using_LnLs(LnLs, traditional=TRUE)

LnLs = c(3, 2, 1, -1, -2, -3)
sum_likes_using_LnLs(LnLs, traditional=FALSE)
sum_likes_using_LnLs(LnLs, traditional=TRUE)

LnLs = c(3, 2, 1, -1, -2, -3)
sum_likes_using_LnLs(LnLs, traditional=FALSE)
sum_likes_using_LnLs(LnLs, traditional=TRUE)

# Precision problems with traditional
LnLs = c(-745, -745, -745)
sum_likes_using_LnLs(LnLs, traditional=FALSE)
sum_likes_using_LnLs(LnLs, traditional=TRUE)

# Failure of traditional method because exp(-746)=0 and log(0)=-Inf
LnLs = c(-746, -746, -746)
sum_likes_using_LnLs(LnLs, traditional=FALSE)
sum_likes_using_LnLs(LnLs, traditional=TRUE)

#######################################################
# Difference between two log-likelihoods
#######################################################
likediff_from_LnLs <- function(LnLs, traditional=FALSE)
    {
    # Error check
    if (length(LnLs) != 2)
        {
        txt = paste0("STOP ERROR: likediff_from_LnLs() requires 2 LnLs as input. Instead, you have\nlength(LnLs)=", length(LnLs), ".")
        cat("\n\n")
        stop(txt)
        cat("\n\n")
        }

    if (traditional == FALSE)
        {
        # To add the non-log values, take
        # the minimum of these log values and subtract it from each value
        # (you are just dividing by the min)
        max_of_LnLs = max(LnLs)
        LnLs_minus_max = LnLs - max_of_LnLs

        # Take the exponent
        exp_LnLs_minus_max = exp(LnLs_minus_max)

        # Now these are in normal space, so subtract them
        abs_value_likelihood_difference = abs(exp_LnLs_minus_max[1] - exp_LnLs_minus_max[2])

        # Now, log the value
        log_of_abs_value_likelihood_difference = log(abs_value_likelihood_difference)
        log_of_abs_value_likelihood_difference

        # Multiple by the max (add in log space)
        log_of_abs_value_likelihood_difference = log_of_abs_value_likelihood_difference + max_of_LnLs

        # Get the actual difference in regular space
        # (may be zero)
        abs_value_likelihood_difference = exp(log_of_abs_value_likelihood_difference)

        } else {
        abs_value_likelihood_difference = abs(exp(LnLs[1]) - exp(LnLs[2]))
        log_of_abs_value_likelihood_difference = log(abs_value_likelihood_difference)
        } # END if (traditional == FALSE)

    # Store output
    output = NULL
    output$abs_value_likelihood_difference = abs_value_likelihood_difference
    output$log_of_abs_value_likelihood_difference = log_of_abs_value_likelihood_difference

    extract = '
    abs_value_likelihood_difference = output$abs_value_likelihood_difference
    log_of_abs_value_likelihood_difference = output$log_of_abs_value_likelihood_difference

    '    
    return(output)
    } # END likediff_from_LnLs <- function(LnLs, traditional=FALSE))

# These all work:
LnLs = c(0, -1)
likediff_from_LnLs(LnLs, traditional=FALSE)
likediff_from_LnLs(LnLs, traditional=TRUE)

LnLs = c(-2, -1)
likediff_from_LnLs(LnLs, traditional=FALSE)
likediff_from_LnLs(LnLs, traditional=TRUE)

LnLs = c(-1, -2)
likediff_from_LnLs(LnLs, traditional=FALSE)
likediff_from_LnLs(LnLs, traditional=TRUE)

LnLs = c(2, 1)
likediff_from_LnLs(LnLs, traditional=FALSE)
likediff_from_LnLs(LnLs, traditional=TRUE)

LnLs = c(1, 2)
likediff_from_LnLs(LnLs, traditional=FALSE)
likediff_from_LnLs(LnLs, traditional=TRUE)

LnLs = c(1, -100)
likediff_from_LnLs(LnLs, traditional=FALSE)
likediff_from_LnLs(LnLs, traditional=TRUE)

LnLs = c(-744, -745)
likediff_from_LnLs(LnLs, traditional=FALSE)
likediff_from_LnLs(LnLs, traditional=TRUE)

# Precision issues:
LnLs = c(-745, -746)
likediff_from_LnLs(LnLs, traditional=FALSE)
likediff_from_LnLs(LnLs, traditional=TRUE)

# Failure
LnLs = c(-746, -747)
# Works:
likediff_from_LnLs(LnLs, traditional=FALSE)
# Failure:
likediff_from_LnLs(LnLs, traditional=TRUE)

# Failure
LnLs = c(-1746, -1747)
# Works:
likediff_from_LnLs(LnLs, traditional=FALSE)
# Failure:
likediff_from_LnLs(LnLs, traditional=TRUE)

#####################################################
# Get the log(Harmonic mean of some log-likelihoods)
#####################################################

harmonic_mean_of_likes_from_LnLs <- function(LnLs, traditional=FALSE)
    {
    n = length(LnLs)

    if (traditional == FALSE)
        {
        # Take each one to the power of -1
        reciprocals_of_LnLs = -1 * LnLs

        # To add the log values, take
        # the minimum of these log values and subtract it from each value
        # (you are just dividing by the min)
        min_of_reciprocals_of_LnLs = min(reciprocals_of_LnLs)
        reciprocals_of_LnLs_minus_min = reciprocals_of_LnLs - min_of_reciprocals_of_LnLs

        # Take the exponent
        exp_reciprocals_of_LnLs_minus_min = exp(reciprocals_of_LnLs_minus_min)

        # Now these are in normal space, so add them
        sum_exp_reciprocals_of_LnLs_minus_min = sum(exp_reciprocals_of_LnLs_minus_min)

        # Now, log summed value
        logsum_exp_reciprocals_of_LnLs_minus_min = log(sum_exp_reciprocals_of_LnLs_minus_min)

        # Now, multiply by the min times n (add the min in log space to each value)
        logsum_exp_reciprocals_of_LnLs_plus_min = logsum_exp_reciprocals_of_LnLs_minus_min + (min_of_reciprocals_of_LnLs)

        # Now, multiply by -1 to do the final overall reciprocal for the harmonic mean
        recip = -1 * logsum_exp_reciprocals_of_LnLs_plus_min

        # And, multiply by n, the number of values being averaged 
        # (add in log space)
        log_harmonic_mean_likes_from_LnLs = recip + log(n)

        } else {
        #########################################
        # Double check, the simple way (works only for moderate-sized LnLs)
        #########################################

        # Convert to normal space
        likes = exp(LnLs)
        recip_likes = likes ^ -1
        denom = sum(recip_likes)
        harmonic_mean_likes = n * (1/denom)

        harmonic_mean_likes
        log(harmonic_mean_likes)

        log_harmonic_mean_likes_from_LnLs = log(harmonic_mean_likes)        
        } # END if (traditional == FALSE)

    return(log_harmonic_mean_likes_from_LnLs)
    } # END harmonic_mean_of_likes_from_LnLs <- function(LnLs, traditional=FALSE)

LnLs = c(0, -1, -2, -3)
harmonic_mean_of_likes_from_LnLs(LnLs, traditional=FALSE)
harmonic_mean_of_likes_from_LnLs(LnLs, traditional=TRUE)

LnLs = c(-1, -2, -3)
harmonic_mean_of_likes_from_LnLs(LnLs, traditional=FALSE)
harmonic_mean_of_likes_from_LnLs(LnLs, traditional=TRUE)

LnLs = -1*c(-1, -2, -3)
harmonic_mean_of_likes_from_LnLs(LnLs, traditional=FALSE)
harmonic_mean_of_likes_from_LnLs(LnLs, traditional=TRUE)

LnLs = c(3, 2, 1, -1, -2, -3)
harmonic_mean_of_likes_from_LnLs(LnLs, traditional=FALSE)
harmonic_mean_of_likes_from_LnLs(LnLs, traditional=TRUE)

LnLs = c(-640, -641, -642)
harmonic_mean_of_likes_from_LnLs(LnLs, traditional=FALSE)
harmonic_mean_of_likes_from_LnLs(LnLs, traditional=TRUE)

LnLs = c(-744, -745, -746)
harmonic_mean_of_likes_from_LnLs(LnLs, traditional=FALSE)
harmonic_mean_of_likes_from_LnLs(LnLs, traditional=TRUE)

LnLs = c(-746, -747, -748)
harmonic_mean_of_likes_from_LnLs(LnLs, traditional=FALSE)
harmonic_mean_of_likes_from_LnLs(LnLs, traditional=TRUE)

arithmetic_mean_of_likes_from_LnLs <- function(LnLs, traditional=FALSE)
    {
    n = length(LnLs)

    if (traditional == FALSE)
        {

        # To add the log values, take
        # the minimum of these log values and subtract it from each value
        # (you are just dividing by the min)
        min_of_LnLs = min(LnLs)
        LnLs_minus_min = LnLs - min_of_LnLs

        # Take the exponent
        exp_LnLs_minus_min = exp(LnLs_minus_min)

        # Now these are in normal space, so mean them
        mean_of_exp_LnLs_minus_min = mean(exp_LnLs_minus_min)

        # Now, log summed value
        logmean_exp_of_LnLs_minus_min = log(mean_of_exp_LnLs_minus_min)

        # Now, multiply by the min times n (add the min in log space to each value)
        log_arithmetic_mean_likes_from_LnLs = logmean_exp_of_LnLs_minus_min + (min_of_LnLs)

        } else {
        #########################################
        # Double check, the simple way (works only for moderate-sized LnLs)
        #########################################

        # Convert to normal space
        likes = exp(LnLs)
        arithmetic_mean_likes = mean(likes)
        arithmetic_mean_likes
        log(arithmetic_mean_likes)

        log_arithmetic_mean_likes_from_LnLs = log(arithmetic_mean_likes)        
        } # END if (traditional == FALSE)

    return(log_arithmetic_mean_likes_from_LnLs)
    } # END arithmetic_mean_of_likes_from_LnLs <- function(LnLs, traditional=FALSE)

LnLs = c(0, -1, -2, -3)
arithmetic_mean_of_likes_from_LnLs(LnLs, traditional=FALSE)
arithmetic_mean_of_likes_from_LnLs(LnLs, traditional=TRUE)

LnLs = c(-1, -2, -3)
arithmetic_mean_of_likes_from_LnLs(LnLs, traditional=FALSE)
arithmetic_mean_of_likes_from_LnLs(LnLs, traditional=TRUE)

LnLs = -1*c(-1, -2, -3)
arithmetic_mean_of_likes_from_LnLs(LnLs, traditional=FALSE)
arithmetic_mean_of_likes_from_LnLs(LnLs, traditional=TRUE)

LnLs = c(3, 2, 1, -1, -2, -3)
arithmetic_mean_of_likes_from_LnLs(LnLs, traditional=FALSE)
arithmetic_mean_of_likes_from_LnLs(LnLs, traditional=TRUE)

LnLs = c(-640, -641, -642)
arithmetic_mean_of_likes_from_LnLs(LnLs, traditional=FALSE)
arithmetic_mean_of_likes_from_LnLs(LnLs, traditional=TRUE)

LnLs = c(-744, -745, -746)
arithmetic_mean_of_likes_from_LnLs(LnLs, traditional=FALSE)
arithmetic_mean_of_likes_from_LnLs(LnLs, traditional=TRUE)

LnLs = c(-746, -747, -748)
arithmetic_mean_of_likes_from_LnLs(LnLs, traditional=FALSE)
arithmetic_mean_of_likes_from_LnLs(LnLs, traditional=TRUE)
Unless otherwise stated, the content of this page is licensed under Creative Commons Attribution-ShareAlike 3.0 License