Snippets

Brian Ward py3_standardize_character_vectors

Created by Brian Ward last modified
def stand_strings( str_list, delim ):
    """Standardize capitalization and word delimiters for a list of strings.
    
    Args:
        str_list (list): A list of of input strings.
        delim (char): The desired output word delimiter - must be one of 
        ' ' (i.e. space), '-', '_', or '.'
        
    Returns:
        List of input strings, converted to uppercase with word delimiters
        converted to the user's chosen delimiter.
        
    Depends:
        Modules re and sys.
    
    Examples:
        >>>import re
        >>>import sys
        >>>ny = [' New York City ', 'New-York_City', 'new.york.city']
        >>>print(stand_strings(ny, "_"))
        ['NEW_YORK_CITY', 'NEW_YORK_CITY', 'NEW_YORK_CITY']

    """
    
    ## Check for appropriate word delimiter selection
    if delim not in [' ', '-', '_', '.']:
        sys.exit('''Please select one of the following for the output word delimiter:
            " " (space), "-", "_", "." ''')
    
    ## Convert to uppercase, strip leading/trailing whitespace        
    str_list = [x.upper().strip() for x in str_list]
    
    ## Replace emdashes with hyphens
    str_list = [str.replace('--', '-') for x in str_list]
    
    ## Remove non-ASCII characters
    ## HAVEN'T PERSONALLY TESTED THIS YET
    ## Got solution from https://yuji.wordpress.com/2010/01/28/python-sql-remove-all-non-ascii-characters-from-string/
    for s in str_list
        s = ''.join([x for x in s if ord(x) < 128])
    
    ## Swap out whitespace, dash, period, underscore for selected delimiter
    str_list = [re.sub('\s|\.|-|_', delim, x) for x in str_list]
    return str_list

Comments (0)