Source code for datacleaner.snakecase

import re
from .datacleaner import *



[docs]class SnakeCase(DataCleaner):
    # the default column normalizer converts null column headers to '<blank>', but note this open happens if blank column headers do not occur at the end, as those would be stripped away
    convert_numbers=False
    data_type=str

    #convert all whitespace of any length to a single space
[docs]    def clean_string(self, val):
        val = super().clean_string(val)
        if type(val) is str:
            return(val.lower())
        return(val)

[docs]    def tokenize(self, val):
        return(val.split('_'))

[docs]    def join(self, values):
        return('_'.join([str(v).lower() for v in values]))

    def __init__(self, **kwargs):

        super().__init__(**kwargs)

        # most of the rest of this handles transliterations to change the field to snake case.
        self.add_transliterations([
            #(r'\s+', '_'),
            (acronymre, underscorejoin),
            (camelre, underscorejoin),
            (interiornum, underscorejoin),
            (precedingnum, underscorejoin),
        # remove any preceding or succeeding underscores
            (r'[\_\s]+', '_'),
            (r'^\_+', ''),
            (r'\_+$', ''),
        ])
Source code for datacleaner.snakecase

Data Cleaner

Navigation

Related Topics