Skip to content

Security

PII detection and redaction utilities.

PII Scanner

agentprobe.security.pii

PII detection and redaction utilities.

Provides pattern-based scanning for common PII types (email, phone, SSN, credit card, IP address) with scan and redact operations.

PIIMatch

Bases: BaseModel

A single PII detection match.

Attributes:

Name Type Description
pii_type str

Category of PII detected.

value str

The matched text.

start int

Start index in the source text.

end int

End index in the source text.

Source code in src/agentprobe/security/pii.py
class PIIMatch(BaseModel):
    """A single PII detection match.

    Attributes:
        pii_type: Category of PII detected.
        value: The matched text.
        start: Start index in the source text.
        end: End index in the source text.
    """

    model_config = ConfigDict(strict=True, frozen=True, extra="forbid")

    pii_type: str
    value: str
    start: int = Field(ge=0)
    end: int = Field(ge=0)

PIIRedactor

Scans text for PII and optionally redacts matches.

Supports configurable PII types and custom patterns.

Attributes:

Name Type Description
enabled_types

Set of PII type names to detect.

Source code in src/agentprobe/security/pii.py
class PIIRedactor:
    """Scans text for PII and optionally redacts matches.

    Supports configurable PII types and custom patterns.

    Attributes:
        enabled_types: Set of PII type names to detect.
    """

    def __init__(
        self,
        enabled_types: set[str] | None = None,
        custom_patterns: dict[str, re.Pattern[str]] | None = None,
    ) -> None:
        """Initialize the PII redactor.

        Args:
            enabled_types: PII types to enable. None enables all built-in types.
            custom_patterns: Additional named patterns to check.
        """
        self._patterns: dict[str, re.Pattern[str]] = {}

        if enabled_types is None:
            self._patterns.update(_PII_PATTERNS)
        else:
            for pii_type in enabled_types:
                if pii_type in _PII_PATTERNS:
                    self._patterns[pii_type] = _PII_PATTERNS[pii_type]

        if custom_patterns:
            self._patterns.update(custom_patterns)

    def scan(self, text: str) -> list[PIIMatch]:
        """Scan text for PII matches.

        Args:
            text: The text to scan.

        Returns:
            List of PII matches found, sorted by position.
        """
        matches: list[PIIMatch] = []

        for pii_type, pattern in self._patterns.items():
            matches.extend(
                PIIMatch(
                    pii_type=pii_type,
                    value=match.group(),
                    start=match.start(),
                    end=match.end(),
                )
                for match in pattern.finditer(text)
            )

        matches.sort(key=lambda m: m.start)
        return matches

    def redact(self, text: str) -> str:
        """Redact all detected PII from text.

        Replaces each match with a type-specific label (e.g. [EMAIL]).

        Args:
            text: The text to redact.

        Returns:
            Text with PII replaced by labels.
        """
        matches = self.scan(text)
        if not matches:
            return text

        # Process matches in reverse order to preserve indices
        result = text
        for match in reversed(matches):
            label = _REDACTION_LABELS.get(match.pii_type, f"[{match.pii_type.upper()}]")
            result = result[: match.start] + label + result[match.end :]

        return result

    def has_pii(self, text: str) -> bool:
        """Check if text contains any detectable PII.

        Args:
            text: The text to check.

        Returns:
            True if PII was detected.
        """
        return len(self.scan(text)) > 0

__init__(enabled_types=None, custom_patterns=None)

Initialize the PII redactor.

Parameters:

Name Type Description Default
enabled_types set[str] | None

PII types to enable. None enables all built-in types.

None
custom_patterns dict[str, Pattern[str]] | None

Additional named patterns to check.

None
Source code in src/agentprobe/security/pii.py
def __init__(
    self,
    enabled_types: set[str] | None = None,
    custom_patterns: dict[str, re.Pattern[str]] | None = None,
) -> None:
    """Initialize the PII redactor.

    Args:
        enabled_types: PII types to enable. None enables all built-in types.
        custom_patterns: Additional named patterns to check.
    """
    self._patterns: dict[str, re.Pattern[str]] = {}

    if enabled_types is None:
        self._patterns.update(_PII_PATTERNS)
    else:
        for pii_type in enabled_types:
            if pii_type in _PII_PATTERNS:
                self._patterns[pii_type] = _PII_PATTERNS[pii_type]

    if custom_patterns:
        self._patterns.update(custom_patterns)

scan(text)

Scan text for PII matches.

Parameters:

Name Type Description Default
text str

The text to scan.

required

Returns:

Type Description
list[PIIMatch]

List of PII matches found, sorted by position.

Source code in src/agentprobe/security/pii.py
def scan(self, text: str) -> list[PIIMatch]:
    """Scan text for PII matches.

    Args:
        text: The text to scan.

    Returns:
        List of PII matches found, sorted by position.
    """
    matches: list[PIIMatch] = []

    for pii_type, pattern in self._patterns.items():
        matches.extend(
            PIIMatch(
                pii_type=pii_type,
                value=match.group(),
                start=match.start(),
                end=match.end(),
            )
            for match in pattern.finditer(text)
        )

    matches.sort(key=lambda m: m.start)
    return matches

redact(text)

Redact all detected PII from text.

Replaces each match with a type-specific label (e.g. [EMAIL]).

Parameters:

Name Type Description Default
text str

The text to redact.

required

Returns:

Type Description
str

Text with PII replaced by labels.

Source code in src/agentprobe/security/pii.py
def redact(self, text: str) -> str:
    """Redact all detected PII from text.

    Replaces each match with a type-specific label (e.g. [EMAIL]).

    Args:
        text: The text to redact.

    Returns:
        Text with PII replaced by labels.
    """
    matches = self.scan(text)
    if not matches:
        return text

    # Process matches in reverse order to preserve indices
    result = text
    for match in reversed(matches):
        label = _REDACTION_LABELS.get(match.pii_type, f"[{match.pii_type.upper()}]")
        result = result[: match.start] + label + result[match.end :]

    return result

has_pii(text)

Check if text contains any detectable PII.

Parameters:

Name Type Description Default
text str

The text to check.

required

Returns:

Type Description
bool

True if PII was detected.

Source code in src/agentprobe/security/pii.py
def has_pii(self, text: str) -> bool:
    """Check if text contains any detectable PII.

    Args:
        text: The text to check.

    Returns:
        True if PII was detected.
    """
    return len(self.scan(text)) > 0