Skip to content

Core

MaskBase

Bases: ABC

Source code in src/anonymizer_data/core/base.py
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
class MaskBase[T](ABC):
    _allowed_type: type

    def __init__(self, value: T) -> None:
        if not self.check_value(value):
            raise ValueError(f"Value {value} is not valid")

        self._value: T = value
        self._value_anonymized: T | None = None

    def check_value(self, value: T) -> bool:
        return isinstance(value, self._allowed_type)

    def view(self) -> T:
        return self._value

    def anonymize(self) -> T:
        """Returns and persists the anonymized value"""
        if self._value_anonymized is None:
            self._value_anonymized = self._anonymize(self._value)
        return self._value_anonymized

    @abstractmethod
    def _anonymize(self, value: T) -> T:
        pass

    def __str__(self) -> str:
        return str(self._value_anonymized or self._value)

    def __len__(self) -> int:
        return len(self._value_anonymized or self._value)  # type: ignore

    def __iter__(self) -> Any:
        return iter(self._value_anonymized or self._value)  # type: ignore

anonymize()

Returns and persists the anonymized value

Source code in src/anonymizer_data/core/base.py
21
22
23
24
25
def anonymize(self) -> T:
    """Returns and persists the anonymized value"""
    if self._value_anonymized is None:
        self._value_anonymized = self._anonymize(self._value)
    return self._value_anonymized

MaskDict

Bases: MaskBase[DataDict]

Source code in src/anonymizer_data/core/dict.py
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
class MaskDict(MaskBase[DataDict]):
    _allowed_type = dict

    def __init__(
        self,
        value: DataDict,
        key_with_type_mask: bool = False,
        selected_keys: list[str] | None = None,
        **kwargs: Any,
    ) -> None:
        super().__init__(value)
        self._extra = kwargs
        self._strategy: DictAnonymizationStrategy = self._get_strategy(
            key_with_type_mask, selected_keys, **kwargs
        )

    def _get_strategy(
        self,
        key_with_type_mask: bool,
        selected_keys: list[str] | None,
        **kwargs: Any,
    ) -> DictAnonymizationStrategy:
        from .dispatcher import dispatch_value_mask

        if key_with_type_mask:
            return KeyAsTypeMaskDictAnonymizationStrategy(dispatch_value_mask, **kwargs)
        if selected_keys:
            return KeyBasedDictAnonymizationStrategy(
                selected_keys, dispatch_value_mask, **kwargs
            )
        return DefaultDictAnonymizationStrategy(dispatch_value_mask, **kwargs)

    def with_keys(self, keys: list[str]) -> "MaskDict":
        """Reconfigures the dictionary mask to use only the specified keys."""
        from .dispatcher import dispatch_value_mask

        self._strategy = KeyBasedDictAnonymizationStrategy(
            keys, dispatch_value_mask, **self._extra
        )
        return self

    def _anonymize(self, value: DataDict) -> DataDict:
        return self._strategy.anonymize(value)

    @property
    def __dict__(self) -> DataDict:  # type: ignore
        return self._value_anonymized or self._value

    def __getitem__(self, key: str) -> Any:
        value_dict = self._value_anonymized or self._value
        return value_dict[key]

    def __iter__(self):
        if self._value_anonymized:
            return iter(self._value_anonymized.items())
        return iter(self._value.items())

with_keys(keys)

Reconfigures the dictionary mask to use only the specified keys.

Source code in src/anonymizer_data/core/dict.py
46
47
48
49
50
51
52
53
def with_keys(self, keys: list[str]) -> "MaskDict":
    """Reconfigures the dictionary mask to use only the specified keys."""
    from .dispatcher import dispatch_value_mask

    self._strategy = KeyBasedDictAnonymizationStrategy(
        keys, dispatch_value_mask, **self._extra
    )
    return self

MaskList

Bases: MaskBase[list[T]]

This class anonymizes data contained in lists. Just like MaskDict, it can be data of type str, dict or list.

Attributes:

Name Type Description
value str

The string to anonymize.

type_mask Optional[str]

The type mask to anonymize. Default is "string".

string_masker bool

If false the string will never be anonymized. default is True.

size_anonymization float

The size of the anonymized string.

Note

The "size_anonymization" parameter will be passed to MaskStr for each string contained in "value" as well as the other parameters, keeping this in mind be aware that if you pass an invalid value a ValueError may occur when calling the "anonymize" method.

Examples:

>>> from anonymizer_data.core import MaskList
>>> mask_list = MaskList(["Hello world", "Hello Python"])
>>> print(mask_list)
["Hello world", "Hello Python"]
>>> mask_list.anonymize()
['*******orld', '********thon']
>>> mask_list = MaskList(["Hello world", "Hello Python"], size_anonymization=0.5)  # anonymizing by half
>>> print(mask_list.anonymize())
['***** world', '******Python']
>>> mask_list.view()  # View original list
["Hello world", "Hello Python"]

Raises:

Type Description
ValueError

Value {value} is not valid.

Source code in src/anonymizer_data/core/list.py
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
class MaskList[T](MaskBase[list[T]]):
    """
    This class anonymizes data contained in lists. Just like `MaskDict`, it can be data of type `str`, `dict` or `list`.

    Attributes:
        value (str): The string to anonymize.
        type_mask (Optional[str]): The type mask to anonymize. Default is "string".
        string_masker (bool): If false the string will never be anonymized. default is True.
        size_anonymization (float): The size of the anonymized string.

    Note:
        The "size_anonymization" parameter will be passed to MaskStr for each string contained in "value" as well as
        the other parameters, keeping this in mind be aware that if you pass an invalid value a ValueError may occur
        when calling the "anonymize" method.

    Examples:
        >>> from anonymizer_data.core import MaskList
        >>> mask_list = MaskList(["Hello world", "Hello Python"])
        >>> print(mask_list)
        ["Hello world", "Hello Python"]
        >>> mask_list.anonymize()
        ['*******orld', '********thon']
        >>> mask_list = MaskList(["Hello world", "Hello Python"], size_anonymization=0.5)  # anonymizing by half
        >>> print(mask_list.anonymize())
        ['***** world', '******Python']
        >>> mask_list.view()  # View original list
        ["Hello world", "Hello Python"]

    Raises:
        ValueError: Value {value} is not valid.
    """

    _allowed_type = list

    def __init__(self, value: list[T], **kwargs: Any) -> None:
        super().__init__(value)

        self._extra: dict[str, Any] = kwargs

    def _anonymize(self, value: list) -> list:
        from .dispatcher import dispatch_value_mask

        return [dispatch_value_mask(item, **self._extra) for item in value]

    @property
    def __list__(self) -> list:
        return self._value_anonymized or self._value

    def __getitem__(self, index: int) -> T:
        value_list = self._value_anonymized or self._value
        return value_list[index]

    def __eq__(self, other: object) -> bool:
        value_compare = self._value_anonymized or self._value
        if isinstance(other, list):
            return value_compare == other
        elif isinstance(other, MaskList):
            return value_compare == list(other)
        return False

MaskStr

Bases: MaskBase[str]

Class to anonymize strings.

Attributes:

Name Type Description
value str

The string to anonymize.

type_mask Optional[str]

The type mask to anonymize. Default is "string".

anonymize_string Optional[bool]

If false the string will never be anonymized. default is True.

size_anonymization Optional[float]

The size of the anonymized string.

string_masker Optional[MaskDispatch]

Dispatcher of the string to anonymize.

Examples:

>>> string = MaskStr("Hello world")
>>> print(string)
Hello world
>>> string.anonymize()
'*******ord'
>>> print(string)
*******ord
>>> string.view()  # View original string
Hello Word

Raises:

Type Description
ValueError

The 'size_anonymization' field must be between 0 and 1.

ValueError

The 'size_anonymization' must be a float.

ValueError

Value {value} is not valid.

Source code in src/anonymizer_data/core/string.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
class MaskStr(MaskBase[str]):
    """
    Class to anonymize strings.

    Attributes:
        value (str): The string to anonymize.
        type_mask (Optional[str]): The type mask to anonymize. Default is "string".
        anonymize_string (Optional[bool]): If false the string will never be anonymized. default is True.
        size_anonymization (Optional[float]): The size of the anonymized string.
        string_masker (Optional[MaskDispatch]): Dispatcher of the string to anonymize.

    Examples:
        >>> string = MaskStr("Hello world")
        >>> print(string)
        Hello world
        >>> string.anonymize()
        '*******ord'
        >>> print(string)
        *******ord
        >>> string.view()  # View original string
        Hello Word

    Raises:
        ValueError: The 'size_anonymization' field must be between 0 and 1.
        ValueError: The 'size_anonymization' must be a float.
        ValueError: Value {value} is not valid.
    """

    _allowed_type = str
    _type_mask_default: str = "string"

    def __init__(
        self,
        value: str,
        type_mask: str | None = None,
        anonymize_string: bool = True,
        string_masker: MaskDispatch | None = None,
        **kwargs: Any,
    ) -> None:
        super().__init__(value)

        self._type_mask: str = type_mask or self._type_mask_default
        self._string_masker: MaskDispatch = string_masker or MaskDispatch()
        self.__anonymize_string: bool = anonymize_string

        self._extra: dict[str, Any] = kwargs.copy()

        if "size_anonymization" in self._extra:
            self._validate_size_anonymization(self._extra["size_anonymization"])
        elif self._type_mask == self._type_mask_default:
            self._extra["size_anonymization"] = 0.7
            self._validate_size_anonymization(0.7)

    def _anonymize(self, value: str) -> str:
        if not self.__anonymize_string:
            return value
        return self._string_masker.mask(self._type_mask, value, **self._extra)

    @staticmethod
    def _validate_size_anonymization(size_anonymization: float) -> None:
        """Validates the size_anonymization parameter."""
        if not isinstance(size_anonymization, float):
            raise ValueError("The 'size_anonymization' must be a float.")

        size_anonymization = round(size_anonymization, 1)

        if not (0 < abs(size_anonymization) <= 1):
            raise ValueError("The 'size_anonymization' field must be between 0 and 1.")

dispatch_value_mask(value, **extra)

Factory that contains the logic for choosing the correct masker for each type of data.

Source code in src/anonymizer_data/core/dispatcher.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
def dispatch_value_mask(value: Any, **extra: Any) -> Masker:
    """Factory that contains the logic for choosing the correct masker for each type of data."""
    from .list import MaskList
    from .string import MaskStr
    from .dict import MaskDict

    DEFAULT_MASKERS.update(
        {
            "list": lambda value, **kwargs: MaskList(value, **kwargs).anonymize(),
            "dict": lambda value, **kwargs: MaskDict(value, **kwargs).anonymize(),
            "str": lambda value, **kwargs: MaskStr(value, **kwargs).anonymize(),
        }
    )

    type_name = type(value).__name__
    masker_factory = DEFAULT_MASKERS.get(type_name)

    if masker_factory:
        return masker_factory(value, **extra)

    if extra.get("type_mask"):
        return MaskStr(str(value), **extra).anonymize()

    return value