## Lines with @ are roxygen2 tags#' Standardize character vectors to a common format#' #' @param str_in Character vector#' @param word_delim Character defining the word delimiter for the output character#' vector, either " " (space), ".", "-", or "_"#' @return character vector of same dimensions as input vector, but with all#' elements updated to the new format#' @details This function somewhat crudely attempts forces character vectors#' to a common format. It does this by converting everything to uppercase, and replacing#' all common word delimiters (space, period, underscore, dash) with the selected one.#' Note that of the three, only underscores or periods will form syntactically-#' valid R variable names (override by reading in tables with check.names = FALSE). #' However, underscores can cause problems with reading#' VCFs in PLINK, as they will by default be interpreted as delimiters for#' family and individual IDs#' @suggests stringdist, plyrstand_str <- function(str_in, word_delim = "-") { ## Check for appropriate word delimiter selection if(!word_delim %in% c(" ", "-", "_", ".")) { stop('Please select one of the following for the output word delimiter: " " [space], ".", "-", "_"') } ## Convert everything to uppercase str_out <- toupper(str_in) ## Remove leading/trailing whitespace str_out <- trimws(str_out, which = "both") ## Remove non-ASCII characters str_out <- iconv(str_out, "latin1", "ASCII", sub="") ## Swap out whitespace, dash, period, underscore for selected delimiter str_out <- gsub("\\s|-|\\.|_", word_delim, str_out) return(str_out)}
HTTPSSSH
You can clone a snippet to your computer for local editing.
Learn more.