Snippets

Brian Ward R_standardize_character_vectors

Updated by Brian Ward

File standardize_char_vec.r Modified

  • Ignore whitespace
  • Hide word diff
   ## Replace emdashes with hyphens
   str_out <- gsub("--", "-", str_out, fixed = TRUE)
   
+  ## Remove comment characters
+  str_out <- gsub("#", "", str_out, fixed = TRUE)
+  
+  ## Convert asterices to dashes
+  str_out <- gsub("*", "-", str_out, fixed = TRUE)
+  
   ## Remove non-ASCII characters
   str_out <- iconv(str_out, "latin1", "ASCII", sub="")
   
Updated by Brian Ward

File standardize_char_vec.r Modified

  • Ignore whitespace
  • Hide word diff
   str_out <- trimws(str_out, which = "both")
   
   ## Replace emdashes with hyphens
-  str_out <- gsub("--", "-", fixed = TRUE)
+  str_out <- gsub("--", "-", str_out, fixed = TRUE)
   
   ## Remove non-ASCII characters
   str_out <- iconv(str_out, "latin1", "ASCII", sub="")
Updated by Brian Ward

File standardize_char_vec.r Modified

  • Ignore whitespace
  • Hide word diff
   ## Remove leading/trailing whitespace
   str_out <- trimws(str_out, which = "both")
   
+  ## Replace emdashes with hyphens
+  str_out <- gsub("--", "-", fixed = TRUE)
+  
   ## Remove non-ASCII characters
   str_out <- iconv(str_out, "latin1", "ASCII", sub="")
   
Updated by Brian Ward

File standardize_char_vec.r Modified

  • Ignore whitespace
  • Hide word diff
   ## Remove leading/trailing whitespace
   str_out <- trimws(str_out, which = "both")
   
+  ## Remove non-ASCII characters
+  str_out <- iconv(str_out, "latin1", "ASCII", sub="")
+  
   ## Swap out whitespace, dash, period, underscore for selected delimiter
   str_out <- gsub("\\s|-|\\.|_", word_delim, str_out)
   
Updated by Brian Ward

File standardize_char_vec.r Modified

  • Ignore whitespace
  • Hide word diff
 
 #' Standardize character vectors to a common format
 #' 
-#' @param names_in Character vector
+#' @param str_in Character vector
 #' @param word_delim Character defining the word delimiter for the output character
 #'   vector, either " " (space), ".", "-", or "_"
 #' @return character vector of same dimensions as input vector, but with all
 #'   VCFs in PLINK, as they will by default be interpreted as delimiters for
 #'   family and individual IDs
 #' @suggests stringdist, plyr
-stand_char <- function(names_in, word_delim = "-") {
+stand_str <- function(str_in, word_delim = "-") {
   
   ## Check for appropriate word delimiter selection
   if(!word_delim %in% c(" ", "-", "_", ".")) {
   }
   
   ## Convert everything to uppercase
-  names_out <- toupper(names_in)
+  str_out <- toupper(str_in)
   
   ## Remove leading/trailing whitespace
-  names_out <- trimws(names_out, which = "both")
+  str_out <- trimws(str_out, which = "both")
   
   ## Swap out whitespace, dash, period, underscore for selected delimiter
-  names_out <- gsub("\\s|-|\\.|_", word_delim, names_out)
+  str_out <- gsub("\\s|-|\\.|_", word_delim, str_out)
   
-  return(names_out)
+  return(str_out)
 }
  1. 1
  2. 2