R/nomogrammer.r from sbalci/ClinicoPathJamoviModule

R/nomogrammer.r
Summary

Maintainability

Test Coverage

Issues
#' @import ggplot2
#' @import scales
#' @importFrom stats line


## Create simple Fagan nomograms as ggplot objects
##   Based on Perl web-implementation (https://araw.mede.uic.edu/cgi-bin/testcalc.pl)
##   Authors: AM. Chekroud* & A. Schwartz (* adam dot chekroud at yale . edu)
##   December 2016

nomogrammer <- function(Prevalence,
                        Sens = NULL,
                        Spec = NULL,
                        Plr = NULL,
                        Nlr = NULL,
                        Detail = FALSE,
                        NullLine = FALSE,
                        LabelSize = (14/5),
                        Verbose = FALSE){

## Function inputs:
    # Prevalence (prior probability) as a number between 0 and 1
    # Either
        # Sens & Spec
        # model sensitivity and specificity as a number between 0 and 1
    # Or
        # Likelihood ratios
        # Positive and Negative LRs (numeric)

## Function options:
    # Detail: If true, will overlay key statistics onto the plot
    # NullLine: If true, will add a line from prior prob through LR = 1
    # LabelSize: Tweak this number to change the label sizes
    # Verbose: Print out relevant metrics in the console

## Function returns:
    # ggplot object



######################################
########## Libraries & Functions #####
######################################

## Libraries
# require(ggplot2)
# require(scales)






## Helper functions
##   (defined inside nomogrammer, so remain local only & wont clutter user env)
odds         <- function(p){
    # Function converts probability into odds
    o <- p/(1-p)
    return(o)
}

logodds      <- function(p){
    # Function returns logodds for a probability
    lo <- log10(p/(1-p))
    return(lo)
}

logodds_to_p <- function(lo){
    # Function goes from logodds back to a probability
    o <- 10^lo
    p <- o/(1+o)
    return(p)
}

p2percent <- function(p){
    # Function turns numeric probability into string percentage
    # e.g. 0.6346111 -> 63.5%
    scales::percent(signif(p, digits = 3))}


######################################
########## Calculations     ##########
######################################

## Checking inputs

## Prevalence
# needs to exist
if(missing(Prevalence)){
    stop("Prevalence is missing")
}
# needs to be numeric
if(!is.numeric(Prevalence)){stop("Prevalence should be numeric")}
# needs to be a prob not a percent
if((Prevalence > 1) | (Prevalence <= 0)){stop("Prevalence should be a probability (did you give a %?)")}

# Did user give sens & spec?
if(missing(Sens) | missing(Spec)){
    sensspec <- FALSE
} else{ sensspec <- TRUE}
# if yes, make sure they are numbers
if(sensspec == TRUE){
    if(!is.numeric(Sens)){stop("Sensitivity should be numeric")}
    if(!is.numeric(Spec)){stop("Specificity should be numeric")}
    # numbers that are probabilities not percentages
    if((Sens > 1) | (Sens <= 0)){stop("Sensitivity should be a probability (did you give a %?)")}
    if((Spec > 1) | (Spec <= 0)){stop("Specificity should be a probability (did you give a %?)")}
}


# Did user give PLR & NLR?
if(missing(Plr) | missing(Nlr)){
    plrnlr <- FALSE
} else{plrnlr <- TRUE}
# if yes, make sure they are numbers
if(plrnlr == TRUE){
    if(!is.numeric(Plr)){stop("PLR should be numeric")}
    if(!is.numeric(Nlr)){stop("NLR should be numeric")}
    # numbers that vaguely make sense
    if(Plr < 1){stop("PLR shouldn't be less than 1")}
    if(Nlr < 0){stop("NLR shouldn't be below zero")}
    if(Nlr > 1){stop("NLR shouldn't be more than 1")}
}

# Did they give a valid sensspec and plrnlr? If yes, ignore the LRs and tell them
if((sensspec == TRUE) && (plrnlr == TRUE) ){
    warning("You provided sens/spec as well as likelihood ratios-- I ignored the LRs!")
}


## If sens/spec provided, we calculate posterior probabilities & odds using sens & spec
##  otherwise, if plr and nlr provided, we calculate posteriors using them
##  if neither exist, then return an error
if(sensspec == TRUE){
    prior_prob  <- Prevalence
    prior_odds  <- odds(prior_prob)
    sensitivity <- Sens
    specificity <- Spec
    PLR <- sensitivity/(1-specificity)
    NLR <- (1-sensitivity)/specificity
    post_odds_pos  <- prior_odds * PLR
    post_odds_neg  <- prior_odds * NLR
    post_prob_pos  <- post_odds_pos/(1+post_odds_pos)
    post_prob_neg  <- post_odds_neg/(1+post_odds_neg)
} else if(plrnlr == TRUE){
    prior_prob  <- Prevalence
    prior_odds  <- odds(prior_prob)
    PLR <- Plr
    NLR <- Nlr
    sensitivity <- (PLR*(1-NLR))/(PLR-NLR)    ## TODO: check Adam's math!
    specificity <- (1-PLR)/(NLR-PLR)          ## TODO: check Adam's math!
    post_odds_pos  <- prior_odds * PLR
    post_odds_neg  <- prior_odds * NLR
    post_prob_pos  <- post_odds_pos/(1+post_odds_pos)
    post_prob_neg  <- post_odds_neg/(1+post_odds_neg)
} else{
        stop("Couldn't find sens & spec, or positive & negative likelihood ratios")
    }



######################################
########## Plotting (prep)  ##########
######################################


## Set common theme preferences up front
theme_set(theme_bw() +
              theme(axis.text.x = element_blank(),
                    axis.ticks.x = element_blank(),
                    axis.title.x = element_blank(),
                    axis.title.y = element_text(angle = 0),
                    axis.title.y.right = element_text(angle = 0),
                    axis.line = element_blank(),
                    panel.grid = element_blank(),
                    legend.position = "none"
                    )
          )

## Setting up the points of interest along the y-axes

# Select probabilities of interest (nb as percentages)
ticks_prob    <- c(0.1, 0.2, 0.5, 1, 2, 5, 10, 20, 30,
                   40, 50, 60, 70, 80, 90, 95, 99)
# Convert % to odds
ticks_odds    <- odds(ticks_prob/100)
# Convert % to logodds
ticks_logodds <- logodds(ticks_prob/100)

# Select the likelihood ratios of interest (for the middle y-axis)
ticks_lrs     <- sort(c(10^(-3:3), 2*(10^(-3:2)), 5*(10^(-3:2))))
# Log10 them since plot is in logodds space
ticks_log_lrs <- log10(ticks_lrs)




## Fixing particular x-coordinates
left     <- 0
right    <- 1
middle   <- 0.5
midright <- 0.75

## Lay out the four key plot points
##  (the start and finish of the positive and negative lines)

# Initially these are expressed as probabilities
df <- data.frame(x=c(left, right, left, right),
                 y=c(prior_prob, post_prob_pos, prior_prob, post_prob_neg),
                 line = c("pos", "pos", "neg", "neg"))

adj_min      <- range(ticks_logodds)[1]
adj_max      <- range(ticks_logodds)[2]
adj_diff     <- adj_max - adj_min
scale_factor <- abs(adj_min) - adj_diff/2
#df$lo_y <- ifelse(df$x==left,(10/adj_diff)*logodds(1-df$y)-1,logodds(df$y))

# Convert probabilities to logodds for plotting
df$lo_y  <- ifelse(df$x==left,logodds(1-df$y)-scale_factor,logodds(df$y))
# zero         <- data.frame(x = c(left,right),
#                            y = c(0,0),
#                            line = c('pos','pos'),
#                            lo_y = c(-scale_factor,0))





rescale   <- range(ticks_logodds) + abs(adj_min) - adj_diff/2
rescale_x_breaks  <- ticks_logodds + abs(adj_min) - adj_diff/2



######################################
########## Plot             ##########
######################################


p <- ggplot(df) +
        geom_line(aes(x = x, y = lo_y, color = line), size = 1) +
        geom_vline(xintercept = middle) +
        annotate(geom = "text",
                 x = rep(middle+.075, length(ticks_log_lrs)),
                 y = (ticks_log_lrs-scale_factor)/2,
                 label = ticks_lrs,
                 size = rel(LabelSize)) +
        annotate(geom="point",
                 x = rep(middle, length(ticks_log_lrs)),
                 y = (ticks_log_lrs-scale_factor)/2,
                 size = 1) +
        scale_x_continuous(expand = c(0,0)) +
        scale_y_continuous(expand = c(0,0),
                           limits = rescale,
                           breaks = -rescale_x_breaks,
                           labels = ticks_prob,
                           name = "prior \n prob.",
                           sec.axis = sec_axis(trans = ~.,
                                               name = "posterior \n prob.",
                                               labels = ticks_prob,
                                               breaks = ticks_logodds))

## Optional overlay text: prevalence, PLR/NLR, and posterior probabilities
detailedAnnotation <- paste(
    paste0("prevalence = ", p2percent(prior_prob)),
    paste("PLR =", signif(PLR, 3),", NLR =", signif(NLR, 3)),
    paste("post. pos =", p2percent(post_prob_pos),
          ", neg =", p2percent(post_prob_neg)),
    sep = "\n")


## Optional amendments to the plot

## Do we add the null line i.e. LR = 1, illustrating an uninformative model
if(NullLine == TRUE){
    ## If yes, first calculate the start and end points
    uninformative <- data.frame(
        x = c(left,right),
        lo_y = c( (logodds(1-prior_prob) - scale_factor) , logodds(prior_prob))
    )

    p <- p + geom_line(aes(x = x, y = lo_y), data = uninformative,
                       color = "gray",
                       lty = 2,
                       inherit.aes = FALSE)
}


## Do we add the detailed stats to the top right?
if(Detail == TRUE){
    p <- p + annotate(geom = "text",
                      x = midright,
                      y = 2,
                      label = detailedAnnotation,
                      size = rel(LabelSize))
    }

if(Verbose == TRUE){
    writeLines(
        text = c(
            paste0("prevalence = ", p2percent(prior_prob)),
            paste("PLR =", signif(PLR, 3)),
            paste("NLR =", signif(NLR, 3)),
            paste("posterior probability (positive) =", p2percent(post_prob_pos)),
            paste("posterior probability (negative) =", p2percent(post_prob_neg)),
            paste("sensitivity =", p2percent(sensitivity)),
            paste("specificity =", p2percent(specificity))
            # sep = "\n"
        )
    )
    }


return(p)

}






### TODO:
# Allow ppl to input the confusion matrix
# input_TP
# input_FP
# input_FN
# input_TN
#                            Obs
#                   present       absent
#            +----------------+----------------+
#      pos   | True Positive  | False Positive |
# Pred       +----------------+----------------+
#      neg   | False Negative | True Negative  |
#            +----------------+----------------+