Skip to content

Handlers

MaskDispatch

Class responsible for managing anonymization handlers.

Source code in src/anonymizer_data/handlers/dispatch.py
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
class MaskDispatch:
    """Class responsible for managing anonymization handlers."""

    _handlers: dict[str, Callable[..., Any]] = {}

    @classmethod
    def register(cls, *type_masks: str) -> Callable:
        """Decorator to register a handler for specific mask types."""

        def decorator(handler: Callable) -> Callable:
            for type_mask in type_masks:
                cls.add_handler(type_mask, handler)
            return handler

        return decorator

    @classmethod
    def add_handler(cls, type_mask: str, handler: Callable) -> None:
        """Adds a handler for a specific mask type."""
        cls._handlers[type_mask] = handler

    def mask(self, type_mask: str, data: Any, **kwargs: Any) -> Any:
        """Applies the appropriate mask to the given data if the type exists."""
        if type_mask not in self._handlers:
            return data
        return self._handlers[type_mask](data, **kwargs)

add_handler(type_mask, handler) classmethod

Adds a handler for a specific mask type.

Source code in src/anonymizer_data/handlers/dispatch.py
20
21
22
23
@classmethod
def add_handler(cls, type_mask: str, handler: Callable) -> None:
    """Adds a handler for a specific mask type."""
    cls._handlers[type_mask] = handler

mask(type_mask, data, **kwargs)

Applies the appropriate mask to the given data if the type exists.

Source code in src/anonymizer_data/handlers/dispatch.py
25
26
27
28
29
def mask(self, type_mask: str, data: Any, **kwargs: Any) -> Any:
    """Applies the appropriate mask to the given data if the type exists."""
    if type_mask not in self._handlers:
        return data
    return self._handlers[type_mask](data, **kwargs)

register(*type_masks) classmethod

Decorator to register a handler for specific mask types.

Source code in src/anonymizer_data/handlers/dispatch.py
 9
10
11
12
13
14
15
16
17
18
@classmethod
def register(cls, *type_masks: str) -> Callable:
    """Decorator to register a handler for specific mask types."""

    def decorator(handler: Callable) -> Callable:
        for type_mask in type_masks:
            cls.add_handler(type_mask, handler)
        return handler

    return decorator

anonymize_all_string(string, **kwargs)

Anonymize all characters of a string.

Source code in src/anonymizer_data/handlers/functions.py
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
@MaskDispatch.register(
    "username",
    "first_name",
    "name",
    "nome",
    "endereco",
    "endereço",
    "address",
    "bairro",
    "neighborhood",
    "district",
    "suburb",
    "quarter",
    "sexo",
    "sex",
    "gender",
    "raça",
    "raca",
    "race",
    "cor",
    "color",
    "senha",
    "password",
    "tipo_sanguineo",
    "blood_type",
)
def anonymize_all_string(string: str, **kwargs: Any) -> str:
    """Anonymize all characters of a string."""
    return anonymize_string(str(string), size_anonymization=1.0, **kwargs)

anonymize_cep(cep, **kwargs)

Anonymize a Brazilian CEP (Código de Endereçamento Postal) by masking parts of it.

This function takes a CEP number as input, removes any non-numeric characters, and returns a masked version of the CEP. If the input CEP is formatted with a hyphen, it will mask the first five digits while revealing the last three digits. If the CEP is provided without formatting, it will mask all but the last three digits.

Parameters:

Name Type Description Default
cep str

The original CEP number to be anonymized, which may include non-numeric characters.

required

Returns:

Name Type Description
str str

The masked version of the CEP number.

Source code in src/anonymizer_data/handlers/functions.py
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
@MaskDispatch.register("cep")
def anonymize_cep(cep: str, **kwargs: Any) -> str:
    """
    Anonymize a Brazilian CEP (Código de Endereçamento Postal) by masking parts of it.

    This function takes a CEP number as input, removes any non-numeric characters,
    and returns a masked version of the CEP. If the input CEP is formatted with a hyphen,
    it will mask the first five digits while revealing the last three digits.
    If the CEP is provided without formatting, it will mask all but the last three digits.

    Parameters:
        cep (str): The original CEP number to be anonymized, which may include non-numeric characters.

    Returns:
        str: The masked version of the CEP number.
    """
    if not isinstance(cep, str) or not re.match(r"^\d{5}-?\d{3}$", cep):
        return _handle_invalid_doc(str(cep), "CEP", **kwargs)

    mask_char = kwargs.get("mask_char", Config.default_mask_char)
    pattern = re.sub(r"[^0-9]", "", cep)

    if "-" in cep:
        return f"{mask_char*5}-{cep[6:]}"
    return mask_string_part(pattern, start=0, end=5, **kwargs)

anonymize_cnpj(cnpj, **kwargs)

Anonymize a Brazilian CNPJ (Cadastro Nacional da Pessoa Jurídica) number by masking parts of it.

This function takes a CNPJ number as input, removes any non-numeric characters, and returns a masked version of the CNPJ. If the input CNPJ is formatted with dots, slashes, and a dash, it will mask the first two digits and the last four digits, while revealing the middle digits. If the CNPJ is provided without formatting, it will mask all but the last four digits.

Parameters:

Name Type Description Default
cnpj str

The original CNPJ number to be anonymized, which may include non-numeric characters.

required

Returns:

Name Type Description
str str

The masked version of the CNPJ number.

Source code in src/anonymizer_data/handlers/functions.py
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
@MaskDispatch.register("cnpj")
def anonymize_cnpj(cnpj: str, **kwargs: Any) -> str:
    """
    Anonymize a Brazilian CNPJ (Cadastro Nacional da Pessoa Jurídica) number by masking parts of it.

    This function takes a CNPJ number as input, removes any non-numeric characters,
    and returns a masked version of the CNPJ. If the input CNPJ is formatted with dots, slashes,
    and a dash, it will mask the first two digits and the last four digits, while revealing the
    middle digits. If the CNPJ is provided without formatting, it will mask all but the last four digits.

    Parameters:
        cnpj (str): The original CNPJ number to be anonymized, which may include non-numeric characters.

    Returns:
        str: The masked version of the CNPJ number.
    """
    validate_cnpj = CNPJ()
    if not isinstance(cnpj, str) or not validate_cnpj.validate(cnpj):
        return _handle_invalid_doc(str(cnpj), "CNPJ", **kwargs)

    mask_char = kwargs.get("mask_char", Config.default_mask_char)
    pattern = re.sub(r"[^0-9]", "", cnpj)

    if (
        "." in cnpj and "-" in cnpj and "/" in cnpj
    ):  # Original had a bug `"-" in cnpj and "-" in cnpj`. Let's fix that too.
        return f"{mask_char*2}.{mask_char*3}.{pattern[5:8]}/{mask_char*4}-{mask_char*2}"
    return mask_string_part(pattern, start=0, end=9, **kwargs)

anonymize_cpf(cpf, **kwargs)

Anonymize a Brazilian CPF (Cadastro de Pessoas Físicas) number by masking parts of it.

This function takes a CPF number as input, removes any non-numeric characters, and returns a masked version of the CPF. If the input CPF is formatted with dots and a dash, it will mask the first three digits and the last two digits, while revealing the middle digits. If the CPF is provided without formatting, it will mask all but the last four digits.

Parameters:

Name Type Description Default
cpf str

The original CPF number to be anonymized, which may include non-numeric characters.

required

Returns:

Name Type Description
str str

The masked version of the CPF number.

Source code in src/anonymizer_data/handlers/functions.py
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
@MaskDispatch.register("cpf", "cpfs")
def anonymize_cpf(cpf: str, **kwargs: Any) -> str:
    """
    Anonymize a Brazilian CPF (Cadastro de Pessoas Físicas) number by masking parts of it.

    This function takes a CPF number as input, removes any non-numeric characters,
    and returns a masked version of the CPF. If the input CPF is formatted with dots and a dash,
    it will mask the first three digits and the last two digits, while revealing the middle digits.
    If the CPF is provided without formatting, it will mask all but the last four digits.

    Parameters:
        cpf (str): The original CPF number to be anonymized, which may include non-numeric characters.

    Returns:
        str: The masked version of the CPF number.
    """
    validate_cpf = CPF()
    if not isinstance(cpf, str) or not validate_cpf.validate(cpf):
        return _handle_invalid_doc(str(cpf), "CPF", **kwargs)

    mask_char = kwargs.get("mask_char", Config.default_mask_char)
    pattern = re.sub(r"[^0-9]", "", cpf)

    if "." in cpf and "-" in cpf:
        return f"{mask_char*3}.{pattern[3:6]}.{mask_char*3}-{mask_char*2}"
    return mask_string_part(pattern, start=0, end=9, **kwargs)

anonymize_email(email, **kwargs)

Anonymize an email address by masking the username part.

This function takes an email address as input and replaces the username part (the part before the '@') with a masked version, while keeping the domain part intact. The level of anonymization for the username can be adjusted using additional parameters.

Parameters:

Name Type Description Default
email str

The original email address to be anonymized.

required

Returns:

Name Type Description
str str

The masked version of the email address.

Source code in src/anonymizer_data/handlers/functions.py
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
@MaskDispatch.register("email", "mail")
def anonymize_email(email: str, **kwargs: Any) -> str:
    """
    Anonymize an email address by masking the username part.

    This function takes an email address as input and replaces the username part
    (the part before the '@') with a masked version, while keeping the domain part intact.
    The level of anonymization for the username can be adjusted using additional parameters.

    Parameters:
        email (str): The original email address to be anonymized.

    Returns:
        str: The masked version of the email address.
    """
    if not isinstance(email, str) or "@" not in email:
        return _handle_invalid_doc(str(email), "Email", **kwargs)

    username, domain = email.split("@", 1)
    if not username or not domain:
        return _handle_invalid_doc(str(email), "Email", **kwargs)

    masked_username = anonymize_string(username, size_anonymization=0.9, **kwargs)

    return f"{masked_username}@{domain}"

anonymize_numeric_digits(string, **kwargs)

Anonymize all numeric digits in a string by replacing them with asterisks.

This function scans the input string and replaces every numeric digit (0-9) with an asterisk (*), effectively anonymizing any sensitive numerical information.

Parameters:

Name Type Description Default
string str

The original string containing numeric digits to be anonymized.

required

Returns:

Name Type Description
str str

The modified string with all numeric digits replaced by asterisks.

Source code in src/anonymizer_data/handlers/functions.py
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
@MaskDispatch.register("numero", "number")
def anonymize_numeric_digits(string: str, **kwargs: Any) -> str:
    """
    Anonymize all numeric digits in a string by replacing them with asterisks.

    This function scans the input string and replaces every numeric digit (0-9)
    with an asterisk (*), effectively anonymizing any sensitive numerical information.

    Parameters:
        string (str): The original string containing numeric digits to be anonymized.

    Returns:
        str: The modified string with all numeric digits replaced by asterisks.
    """
    mask_char = kwargs.get("mask_char", Config.default_mask_char)
    return re.sub(r"\d", mask_char, str(string))

anonymize_phone_number(phone, **kwargs)

Anonymize a phone number by masking parts of it while preserving its format.

This function takes a phone number as input, removes any non-numeric characters, and returns a masked version of the phone number. The format is preserved, with specific parts masked according to the rules defined.

Parameters:

Name Type Description Default
phone str

The original phone number to be anonymized, which may include non-numeric characters.

required

Returns:

Name Type Description
str str

The masked version of the phone number.

Source code in src/anonymizer_data/handlers/functions.py
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
@MaskDispatch.register(
    "phone",
    "smartphone",
    "cell_phone_number",
    "cell_phone",
    "celular",
    "telefone",
    "telefone_fixo",
)
def anonymize_phone_number(phone: str, **kwargs: Any) -> str:
    """
    Anonymize a phone number by masking parts of it while preserving its format.

    This function takes a phone number as input, removes any non-numeric characters,
    and returns a masked version of the phone number. The format is preserved, with
    specific parts masked according to the rules defined.

    Parameters:
        phone (str): The original phone number to be anonymized, which may include non-numeric characters.

    Returns:
        str: The masked version of the phone number.
    """
    if not isinstance(phone, str):
        return _handle_invalid_doc(str(phone), "Phone", **kwargs)

    phone_digits = re.findall(r"\d", phone)

    if len(phone_digits) < 3:
        return _handle_invalid_doc(phone, "Phone", **kwargs)

    mask_char = kwargs.get("mask_char", Config.default_mask_char)
    last_three = phone_digits[-3:]
    anonymized = [mask_char] * (len(phone_digits) - 3)

    replace_iter = iter(anonymized + last_three)

    def to_replace(match):
        return next(replace_iter)

    phone_anonymized = re.sub(r"\d", to_replace, phone)
    return phone_anonymized

anonymize_pis(pis, **kwargs)

Anonymize a Brazilian PIS (Programa de Integração Social) number by masking parts of it.

This function takes a PIS number as input, removes any non-numeric characters, and returns a masked version of the PIS. If the input PIS is formatted with a hyphen, it will mask the first five digits and the last two digits, while revealing the middle digits. If the PIS is provided without formatting, it will mask all but the last four digits.

Parameters:

Name Type Description Default
pis str

The original PIS number to be anonymized, which may include non-numeric characters.

required

Returns:

Name Type Description
str str

The masked version of the PIS number.

Source code in src/anonymizer_data/handlers/functions.py
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
@MaskDispatch.register("pis")
def anonymize_pis(pis: str, **kwargs: Any) -> str:
    """
    Anonymize a Brazilian PIS (Programa de Integração Social) number by masking parts of it.

    This function takes a PIS number as input, removes any non-numeric characters,
    and returns a masked version of the PIS. If the input PIS is formatted with a hyphen,
    it will mask the first five digits and the last two digits, while revealing the middle digits.
    If the PIS is provided without formatting, it will mask all but the last four digits.

    Parameters:
        pis (str): The original PIS number to be anonymized, which may include non-numeric characters.

    Returns:
        str: The masked version of the PIS number.
    """
    validate_pis = PIS()

    if not isinstance(pis, str) or not validate_pis.validate(pis):
        return _handle_invalid_doc(str(pis), "PIS", **kwargs)

    mask_char = kwargs.get("mask_char", Config.default_mask_char)
    pattern = re.sub(r"[^0-9]", "", pis)

    if "-" in pis:
        return f"{mask_char*3}.{mask_char*2}{pattern[5:8]}.{mask_char*2}-{mask_char}"
    return mask_string_part(pattern, start=0, end=8, **kwargs)

anonymize_rg(rg, **kwargs)

Anonymize a Brazilian RG (Registro Geral) number by masking parts of it.

This function takes an RG number as input, removes any non-numeric characters, and returns a masked version of the RG. If the input RG is formatted with dots and a dash, it will mask the first two digits and the last two digits, while revealing the middle digits. If the RG is provided without formatting, it will mask all but the last four digits.

Parameters:

Name Type Description Default
rg str

The original RG number to be anonymized, which may include non-numeric characters.

required

Returns:

Name Type Description
str str

The masked version of the RG number.

Source code in src/anonymizer_data/handlers/functions.py
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
@MaskDispatch.register("rg")
def anonymize_rg(rg: str, **kwargs: Any) -> str:
    """
    Anonymize a Brazilian RG (Registro Geral) number by masking parts of it.

    This function takes an RG number as input, removes any non-numeric characters,
    and returns a masked version of the RG. If the input RG is formatted with dots and a dash,
    it will mask the first two digits and the last two digits, while revealing the middle digits.
    If the RG is provided without formatting, it will mask all but the last four digits.

    Parameters:
        rg (str): The original RG number to be anonymized, which may include non-numeric characters.

    Returns:
        str: The masked version of the RG number.
    """
    if not isinstance(rg, str) or not re.match(
        r"^(?:\d{9}|\d{2}\.\d{3}\.\d{3}-\d)$", rg
    ):
        return _handle_invalid_doc(str(rg), "RG", **kwargs)

    mask_char = kwargs.get("mask_char", Config.default_mask_char)
    pattern = re.sub(r"[^0-9]", "", rg)

    if "." in rg and "-" in rg:
        return f"{mask_char*2}.{pattern[2:5]}.{mask_char*3}-{mask_char*2}"
    return mask_string_part(pattern, start=0, end=6, **kwargs)

anonymize_string(value, size_anonymization, **kwargs)

Anonymize a string by masking a specified portion of it.

This function takes a string and replaces a portion of its characters with asterisks (*). The extent of the masking is determined by the size_anonymization parameter, which specifies the fraction of the string to be masked.

Parameters:

Name Type Description Default
value str

The original string to be anonymized.

required
size_anonymization float

A float value between 0 and 1 indicating the proportion of the string to mask. For example, 0.5 will mask half of the characters in the string.

required

Returns:

Name Type Description
str str

The masked version of the input string. If size_anonymization is set such that no characters are masked, the original string will be returned.

Source code in src/anonymizer_data/handlers/functions.py
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
@MaskDispatch.register("string")
def anonymize_string(value: str, size_anonymization: float, **kwargs: Any) -> str:
    """
    Anonymize a string by masking a specified portion of it.

    This function takes a string and replaces a portion of its characters with asterisks (*).
    The extent of the masking is determined by the `size_anonymization` parameter, which
    specifies the fraction of the string to be masked.

    Parameters:
        value (str): The original string to be anonymized.
        size_anonymization (float): A float value between 0 and 1 indicating the proportion of the string to mask. For example, 0.5 will mask half of the characters in the string.

    Returns:
        str: The masked version of the input string. If `size_anonymization` is set such that no characters are masked, the original string will be returned.
    """
    if size_anonymization == 0:
        return value

    mask_char = kwargs.get("mask_char", Config.default_mask_char)
    total_to_mask = 1 if len(value) == 1 else int(len(value) * size_anonymization)
    string_sliced = (
        value[:total_to_mask] if total_to_mask > 0 else value[total_to_mask:]
    )
    pattern = re.escape(string_sliced)
    modified = re.sub(pattern, mask_char * abs(total_to_mask), value, count=1)
    return modified

anonymize_substring(main_text, substring, occurrences=1, **kwargs)

Anonymize a specified substring in the main text by replacing it with asterisks.

This function searches for the given substring within the main text and replaces it with asterisks. The number of occurrences to replace can be specified.

Parameters:

Name Type Description Default
main_text str

The original text in which the substring will be anonymized.

required
substring str

The substring to be replaced with asterisks.

required
occurrences Optional[int]

The number of times to replace the substring with asterisks (default is 1).

1

Returns:

Name Type Description
str str

The modified text with the specified substring replaced by asterisks.

Source code in src/anonymizer_data/handlers/functions.py
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
def anonymize_substring(
    main_text: str, substring: str, occurrences: int = 1, **kwargs: Any
) -> str:
    """
    Anonymize a specified substring in the main text by replacing it with asterisks.

    This function searches for the given substring within the main text and replaces
    it with asterisks. The number of occurrences to replace can be specified.

    Parameters:
        main_text (str): The original text in which the substring will be anonymized.
        substring (str): The substring to be replaced with asterisks.
        occurrences (Optional[int]): The number of times to replace the substring with asterisks (default is 1).

    Returns:
        str: The modified text with the specified substring replaced by asterisks.
    """
    mask_char = kwargs.get("mask_char", Config.default_mask_char)
    escaped_substring = re.escape(substring)
    anonymized_text = re.sub(
        escaped_substring, mask_char * len(substring), str(main_text), count=occurrences
    )
    return anonymized_text

mask_string_part(string, start, end, occurrences=1, **kwargs)

Mask a specific part of a string with asterisks.

This function replaces a substring of the provided string, defined by the start and end indices, with asterisks. The number of occurrences to replace can be specified.

Parameters:

Name Type Description Default
string str

The original string in which the substring will be masked.

required
start int

The starting index of the substring to be masked.

required
end int

The ending index of the substring to be masked.

required
occurrences Optional[int]

The number of times to replace the substring with asterisks (default is 1).

1

Returns:

Name Type Description
str str

The modified string with the specified substring replaced by asterisks.

Source code in src/anonymizer_data/handlers/functions.py
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
def mask_string_part(
    string: str, start: int, end: int, occurrences: int = 1, **kwargs: Any
) -> str:
    """
    Mask a specific part of a string with asterisks.

    This function replaces a substring of the provided string, defined by the start and end indices,
    with asterisks. The number of occurrences to replace can be specified.

    Parameters:
        string (str): The original string in which the substring will be masked.
        start (int): The starting index of the substring to be masked.
        end (int): The ending index of the substring to be masked.
        occurrences (Optional[int]): The number of times to replace the substring with asterisks (default is 1).

    Returns:
        str: The modified string with the specified substring replaced by asterisks.
    """
    mask_char = kwargs.get("mask_char", Config.default_mask_char)
    pattern = re.escape(string[start:end])
    return re.sub(pattern, mask_char * (end - start), string, count=occurrences)