Updated by Brian Ward 2017-11-12

File format_char_vec.r Deleted

-## Lines with @ are roxygen2 tags

-#' Change character vectors to a common format

-#'

-#' @param names_in Character vector

-#' @param word_delim Character defining the word delimiter for the output character

-#'   vector, either " " (space), ".", "-", or "_"

-#' @return character vector of same dimensions as input vector, but with all

-#'   elements updated to the new format

-#' @details This function somewhat crudely attempts to force matches in line

-#'   names. It does this by converting everything to uppercase, and replacing

-#'   all common word delimiters (space, period, underscore, dash) with the selected one.

-#'   Note that of the three, only underscores or periods will form syntactically-

-#'   valid R variable names (override by reading in tables with check.names = FALSE).

-#'   However, underscores can cause problems with reading

-#'   VCFs in PLINK, as they will by default be interpreted as delimiters for

-#'   family and individual IDs

-#' @suggests stringdist, plyr

-format_char_vec <- function(names_in, word_delim = "-") {

-  ## Check for appropriate word delimiter selection

-  if(!word_delim %in% c(" ", "-", "_", ".")) {

-    stop('Please select one of the following for the output word delimiter:

-         " " (space), ".", "-", "_"')

-  }

-  ## Convert everything to uppercase

-  names_out <- toupper(names_in)

-  ## Remove leading/trailing whitespace

-  names_out <- trimws(names_out, which = "both")

-  ## Substitute the word delimiters - doesn't seem to be any very good way to

-  ## do this in one fell swoop

-  names_out <- gsub(" ", word_delim, names_out)

-  names_out <- gsub("-", word_delim, names_out)

-  names_out <- gsub("\\.", word_delim, names_out)

-  names_out <- gsub("_", word_delim, names_out)

-  return(names_out)

-}

File standardize_char_vec.r Added

+## Lines with @ are roxygen2 tags

+#' Standardize character vectors to a common format

+#'

+#' @param names_in Character vector

+#' @param word_delim Character defining the word delimiter for the output character

+#'   vector, either " " (space), ".", "-", or "_"

+#' @return character vector of same dimensions as input vector, but with all

+#'   elements updated to the new format

+#' @details This function somewhat crudely attempts forces character vectors

+#'   to a common format. It does this by converting everything to uppercase, and replacing

+#'   all common word delimiters (space, period, underscore, dash) with the selected one.

+#'   Note that of the three, only underscores or periods will form syntactically-

+#'   valid R variable names (override by reading in tables with check.names = FALSE).

+#'   However, underscores can cause problems with reading

+#'   VCFs in PLINK, as they will by default be interpreted as delimiters for

+#'   family and individual IDs

+#' @suggests stringdist, plyr

+stand_char <- function(names_in, word_delim = "-") {

+  ## Check for appropriate word delimiter selection

+  if(!word_delim %in% c(" ", "-", "_", ".")) {

+    stop('Please select one of the following for the output word delimiter:

+         " " [space], ".", "-", "_"')

+  }

+  ## Convert everything to uppercase

+  names_out <- toupper(names_in)

+  ## Remove leading/trailing whitespace

+  names_out <- trimws(names_out, which = "both")

+  ## Swap out whitespace, dash, period, underscore for selected delimiter

+  names_out <- gsub("\\s|-|\\.|_", word_delim, names_out)

+  return(names_out)

+}

Updated by Brian Ward 2017-11-10

View revision

File format_char_vec.r Added

+## Lines with @ are roxygen2 tags

+#' Change character vectors to a common format

+#'

+#' @param names_in Character vector

+#' @param word_delim Character defining the word delimiter for the output character

+#'   vector, either " " (space), ".", "-", or "_"

+#' @return character vector of same dimensions as input vector, but with all

+#'   elements updated to the new format

+#' @details This function somewhat crudely attempts to force matches in line

+#'   names. It does this by converting everything to uppercase, and replacing

+#'   all common word delimiters (space, period, underscore, dash) with the selected one.

+#'   Note that of the three, only underscores or periods will form syntactically-

+#'   valid R variable names (override by reading in tables with check.names = FALSE).

+#'   However, underscores can cause problems with reading

+#'   VCFs in PLINK, as they will by default be interpreted as delimiters for

+#'   family and individual IDs

+#' @suggests stringdist, plyr

+format_char_vec <- function(names_in, word_delim = "-") {

+  ## Check for appropriate word delimiter selection

+  if(!word_delim %in% c(" ", "-", "_", ".")) {

+    stop('Please select one of the following for the output word delimiter:

+         " " (space), ".", "-", "_"')

+  }

+  ## Convert everything to uppercase

+  names_out <- toupper(names_in)

+  ## Remove leading/trailing whitespace

+  names_out <- trimws(names_out, which = "both")

+  ## Substitute the word delimiters - doesn't seem to be any very good way to

+  ## do this in one fell swoop

+  names_out <- gsub(" ", word_delim, names_out)

+  names_out <- gsub("-", word_delim, names_out)

+  names_out <- gsub("\\.", word_delim, names_out)

+  names_out <- gsub("_", word_delim, names_out)

+  return(names_out)

+}

File snippet.r Deleted

-## Lines with @ are roxygen2 tags

-#' Change character vectors to a common format

-#'

-#' @param names_in Character vector

-#' @param word_delim Character defining the word delimiter for the output character

-#'   vector, either " " (space), ".", "-", or "_"

-#' @return character vector of same dimensions as input vector, but with all

-#'   elements updated to the new format

-#' @details This function somewhat crudely attempts to force matches in line

-#'   names. It does this by converting everything to uppercase, and replacing

-#'   all common word delimiters (space, period, underscore, dash) with the selected one.

-#'   Note that of the three, only underscores or periods will form syntactically-

-#'   valid R variable names (override by reading in tables with check.names = FALSE).

-#'   However, underscores can cause problems with reading

-#'   VCFs in PLINK, as they will by default be interpreted as delimiters for

-#'   family and individual IDs

-#' @suggests stringdist, plyr

-format_char_vec <- function(names_in, word_delim = "-") {

-  ## Convert everything to uppercase

-  names_out <- toupper(names_in)

-  ## Select appropriate word delimiter for output

-  if(!word_delim %in% c(" ", "-", "_", ".")) {

-    stop('Please select one of the following for the output word delimiter:

-         " " (space), ".", "-", "_"')

-  }

-  ## Substitute the word delimiters - doesn't seem to be any very good way to

-  ## do this in one fell swoop

-  names_out <- gsub(" ", word_delim, names_out)

-  names_out <- gsub("-", word_delim, names_out)

-  names_out <- gsub("\\.", word_delim, names_out)

-  names_out <- gsub("_", word_delim, names_out)

-  return(names_out)

-}

Created by Brian Ward 2017-05-15

View revision

File snippet.r Added

+## Lines with @ are roxygen2 tags

+#' Change character vectors to a common format

+#'

+#' @param names_in Character vector

+#' @param word_delim Character defining the word delimiter for the output character

+#'   vector, either " " (space), ".", "-", or "_"

+#' @return character vector of same dimensions as input vector, but with all

+#'   elements updated to the new format

+#' @details This function somewhat crudely attempts to force matches in line

+#'   names. It does this by converting everything to uppercase, and replacing

+#'   all common word delimiters (space, period, underscore, dash) with the selected one.

+#'   Note that of the three, only underscores or periods will form syntactically-

+#'   valid R variable names (override by reading in tables with check.names = FALSE).

+#'   However, underscores can cause problems with reading

+#'   VCFs in PLINK, as they will by default be interpreted as delimiters for

+#'   family and individual IDs

+#' @suggests stringdist, plyr

+format_char_vec <- function(names_in, word_delim = "-") {

+  ## Convert everything to uppercase

+  names_out <- toupper(names_in)

+  ## Select appropriate word delimiter for output

+  if(!word_delim %in% c(" ", "-", "_", ".")) {

+    stop('Please select one of the following for the output word delimiter:

+         " " (space), ".", "-", "_"')

+  }

+  ## Substitute the word delimiters - doesn't seem to be any very good way to

+  ## do this in one fell swoop

+  names_out <- gsub(" ", word_delim, names_out)

+  names_out <- gsub("-", word_delim, names_out)

+  names_out <- gsub("\\.", word_delim, names_out)

+  names_out <- gsub("_", word_delim, names_out)

+  return(names_out)

+}

HTTPS

SSH

You can clone a snippet to your computer for local editing. Learn more.

Snippets

Brian Ward R_standardize_character_vectors

File format_char_vec.r Deleted

File standardize_char_vec.r Added

File format_char_vec.r Added

File snippet.r Deleted

File snippet.r Added