Python: Data validation
thoroc
Posted on January 20, 2023
NOTE: This was written in late 2022 with py3.9 in mind, so visitors of the future should take the follow with a pinch of salt.
When presented with the challenge to validate data as part of python script (when dealing with JSON message as part of an API), you need to validate the data incoming, and sometimes you might want to validate the data outgoing.
The present post will try to present several different ways to do this. First of all we need to define what the structure of the data looks like. In a typical business context it could look like the following:
- User object containing: first name, last name, date of birth (must be 18 years old or older), contact (optional) and address (optional)
- Contact object contains: email address (optional — must be valid) and phone number (optional — must be a valid UK phone number)
- Address object contains: street address (should include house number, city, postcode (must be a valid UK postcode) and country (must be one of England, Wales, Scotland & Northern Ireland)
Before we start try to validate anything we need to generate some data and try to leverage some existing library to validate things like postcode, email address and phone numbers; Doing that by hand reliably using regex is notoriously challenging.
Some preliminary checks
Let’s also use the Faker library which is a powerful way to generate all sorts of data:
from dateutil.relativedelta import relativedelta
from datetime import date, datetime
from typing import Union
from faker import Faker
fake = Faker("en_GB")
Testing some of the logic we will reuse for validation
Validating a date of birth as a string
def is_underaged(date_of_birth: str, years: int = 18) -> Union[bool, None]:
dob = datetime.strptime(date_of_birth, "%Y-%m-%d").date()
eighteen_years_ago = date.today() - relativedelta(years=years)
return dob > eighteen_years_ago
is_underaged("1992-04-01")
Output:
False
Validating a phone number using phonenumbers
import phonenumbers
my_number = fake.phone_number()
def validate_uk_phone_number(phone_number: str) -> bool:
try:
valid = phonenumbers.parse(phone_number, 'GB')
return phonenumbers.is_valid_number(valid)
except:
return False
print(my_number, validate_uk_phone_number(my_number))
Output:
+44808 1570733 True
Validating an email address using validate_email
from validate_email import validate_email
email_address = fake.free_email()
is_valid = validate_email(email_address=email_address)
print(email_address, is_valid)
Output:
eur.olc.protection.outlook.com resolve error: The DNS response does not contain an answer to the question: eur.olc.protection.outlook.com. IN AAAA
patrickwright@hotmail.co.uk False
Validating an email address using email_validator
from email_validator import validate_email, EmailNotValidError
def validate_email_address(email_address: str, is_new_account: bool = True) -> str:
try:
validation = validate_email(email_address, check_deliverability=is_new_account)
if validation.email:
return True
return False
except EmailNotValidError as e:
raise
valid_email_address = fake.free_email()
print(valid_email_address, validate_email_address(valid_email_address))
Output:
rgreen@hotmail.co.uk True
Validating postcode using postcode_validator_uk
from postcode_validator_uk.validators import UKPostcode
postcode = fake.postcode()
validate_postcode = True if UKPostcode(postcode) else False
print(postcode, validate_postcode)
Output:
W3 8WP True
Refresher about dates manipulation
from datetime import datetime
dob = "1995-11-28"
datetime.strptime(dob, "%Y-%m-%d").date()
Output:
datetime.date(1995, 11, 28)
Faking a bunch of valid date of birth
[
fake.date_between_dates(
date_start=date(1970, 1, 1),
date_end=(date.today() - relativedelta(years=18))
).strftime("%Y-%m-%d") for _ in range(10)
]
Output:
['1981-04-11',
'1971-02-23',
'1995-03-23',
'1993-01-18',
'2001-12-03',
'1975-03-24',
'1996-06-01',
'1972-05-29',
'1978-04-01',
'1988-09-22']
Creating a faker provider for the date of birth and the UK country
We don't want to repeat that logic too many times, so let's create a faker provider to hold that for us.
from faker.providers import BaseProvider
class CustomProvider(BaseProvider):
def date_of_birth(self, is_underaged: bool = False):
start_date = date.today() - relativedelta(years=18) if is_underaged else date(1970, 1, 1)
end_date = date.today() if is_underaged else date.today() - relativedelta(years=18)
dob = self.generator.date_between_dates(date_start=start_date, date_end=end_date)
return dob.strftime("%Y-%m-%d")
def uk_home_country(self):
return self.generator.random_element(
elements=["England", "Wales", "Scotland", "Northern Ireland"]
)
fake.add_provider(CustomProvider)
print(fake.date_of_birth())
print(fake.date_of_birth(is_underaged=True))
print(fake.uk_home_country())
Output:
1989-09-01
2009-06-07
Scotland
Common data
Let's create some data to test all the different solutions
data = [
{
"first_name": fake.first_name(),
"last_name": fake.last_name(),
"dob": fake.date_of_birth()
},
{
"first_name": fake.first_name(),
"last_name": fake.last_name(),
"dob": fake.date_of_birth(),
"contact": {}
},
{
"first_name": fake.first_name(),
"last_name": fake.last_name(),
"dob": fake.date_of_birth(),
"contact": {
"phone_number": fake.phone_number()
},
"address": {
"street": fake.street_address(),
"city": fake.city(),
"country": fake.uk_home_country(),
"postcode": fake.postcode()
}
},
{
"first_name": fake.first_name(),
"last_name": fake.last_name(),
"dob": fake.date_of_birth(),
"contact": {
"email_address": fake.free_email()
},
"address": {
"street": fake.street_address(),
"city": fake.city(),
"country": fake.uk_home_country(),
"postcode": fake.postcode()
}
},
]
data
Output:
[{'first_name': 'Gregory', 'last_name': 'Brooks', 'dob': '1996-11-18'},
{'first_name': 'Lydia',
'last_name': 'Lawrence',
'dob': '1973-12-25',
'contact': {}},
{'first_name': 'Timothy',
'last_name': 'Hilton',
'dob': '2002-03-10',
'contact': {'phone_number': '+44191 496 0419'},
'address': {'street': 'Studio 5\nTom union',
'city': 'North Carolineton',
'country': 'Wales',
'postcode': 'AL2R 0BL'}},
{'first_name': 'Kerry',
'last_name': 'Brown',
'dob': '1979-09-12',
'contact': {'email_address': 'angelaturner@hotmail.co.uk'},
'address': {'street': 'Flat 81\nSimmons courts',
'city': 'Marcton',
'country': 'England',
'postcode': 'IG6 9NH'}}]
Validating using the standard library
Create a helper class to dump the data
import json
class Serialiser:
def __items(self):
props = {}
for k, v in self.__dict__.items():
if k.startswith("_"):
k = k[1:]
props[k] = v
return props
def __repr__(self):
return json.dumps(self, default=lambda o: o.__items())
class Example(Serialiser):
def __init__(self, id, name):
self.id = id
self.name = name
ex = Example(1, "foo")
ex
Output:
{"id": 1, "name": "foo"}
Using validation functions
This is probably the most basic solution out there
from typing import Optional
import json
import phonenumbers
from email_validator import validate_email
from postcode_validator_uk.validators import UKPostcode
class Contact(Serialiser):
def __init__(self, phone_number: Optional[str] = None, email_address: Optional[str] = None):
if phone_number:
self.phone_number = self._is_valid_uk_phone_number(phone_number)
if email_address:
self.email_address = self._is_valid_email_address(email_address)
def _is_valid_uk_phone_number(self, value):
if not phonenumbers.is_valid_number(phonenumbers.parse(value, "GB")):
raise ValueError("Invalid UK phone number.")
return value
def _is_valid_email_address(self, value):
if not validate_email(value, check_deliverability=True).email:
raise ValueError("Invalid Email address.")
return value
class Address(Serialiser):
def __init__(self, street: str, postcode: str, country: str, city: str):
self.street = street
self.postcode = self._is_valid_uk_postcode(postcode)
self.country = self._is_valid_home_country(country)
self.city = city
def _is_valid_uk_postcode(self, value):
if not UKPostcode(value):
raise ValueError("Invalid UK postcode.")
return value
def _is_valid_home_country(self, value):
if value not in ["England", "Wales", "Scotland", "Northern Ireland"]:
raise ValueError("Invalid home country")
return value
class Person(Serialiser):
def __init__(
self,
first_name: str,
last_name: str,
dob: str,
contact: Optional[Contact] = None,
address: Optional[Address] = None
):
self.first_name = first_name
self.last_name = last_name
self.dob = self._legal_age(dob)
if contact:
self.contact = contact
if address:
self.address = address
def _legal_age(self, value: str) -> str:
if not (date.today() - relativedelta(years=18)) > datetime.strptime(value, "%Y-%m-%d").date():
raise ValueError("Underage.")
return value
[Person(**d) for d in data]
Output:
[{"first_name": "Gregory", "last_name": "Brooks", "dob": "1996-11-18"},
{"first_name": "Lydia", "last_name": "Lawrence", "dob": "1973-12-25"},
{"first_name": "Timothy", "last_name": "Hilton", "dob": "2002-03-10", "contact": {"phone_number": "+44191 496 0419"}, "address": {"street": "Studio 5\nTom union", "city": "North Carolineton", "country": "Wales", "postcode": "AL2R 0BL"}},
{"first_name": "Kerry", "last_name": "Brown", "dob": "1979-09-12", "contact": {"email_address": "angelaturner@hotmail.co.uk"}, "address": {"street": "Flat 81\nSimmons courts", "city": "Marcton", "country": "England", "postcode": "IG6 9NH"}}]
Using property
NOTE: only the properties where validation is required are needed
from typing import Optional
import phonenumbers
from email_validator import validate_email
from postcode_validator_uk.validators import UKPostcode
class Contact(Serialiser):
def __init__(self, phone_number: Optional[str] = None, email_address: Optional[str] = None):
if phone_number:
self._phone_number = phone_number
if email_address:
self._email_address = email_address
@property
def phone_number(self):
return self._phone_number
@phone_number.setter
def phone_number(self, value):
if not phonenumbers.is_valid_number(phonenumbers.parse(value, "GB")):
raise ValueError("Invalid UK phone number.")
self._phone_number = value
@property
def email_address(self):
return self._email_address
@email_address.setter
def email_address(self, value):
if not validate_email(value, check_deliverability=True).email:
raise ValueError("Invalid Email address.")
self._email_address = value
class Address(Serialiser):
def __init__(self, street: str, postcode: str, country: str, city: str):
self._street = street
self._postcode = postcode
self._country = country
self._city = city
@property
def street(self):
return self._street
@street.setter
def street(self, value):
self._street = value
@property
def postcode(self):
return self._postcode
@postcode.setter
def postcode(self, value):
if not UKPostcode(value):
raise ValueError("Invalid UK postcode.")
self._postcode = value
@property
def country(self):
return self._country
@country.setter
def country(self, value):
if value not in ["England", "Wales", "Scotland", "Northern Ireland"]:
raise ValueError("Invalid home country")
self._country = value
@property
def city(self):
return self._city
@city.setter
def city(self, value):
self._city = value
class Person(Serialiser):
def __init__(
self,
first_name: str,
last_name: str,
dob: str,
contact: Optional[Contact] = None,
address: Optional[Address] = None
):
self._first_name = first_name
self._last_name = last_name
self._dob = dob
if contact:
self.contact = contact
if address:
self.address = address
@property
def first_name(self):
return self._first_name
@first_name.setter
def first_name(self, value):
self._first_name = value
@property
def last_name(self):
return self._last_name
@last_name.setter
def last_name(self, value):
self._last_name = value
@property
def dob(self):
return self._dob
@dob.setter
def dob(self, value):
if not (date.today() - relativedelta(years=18)) > datetime.strptime(value, "%Y-%m-%d").date():
raise ValueError("Underage.")
self._dob = value
[Person(**d) for d in data]
Output
[{"first_name": "Gregory", "last_name": "Brooks", "dob": "1996-11-18"},
{"first_name": "Lydia", "last_name": "Lawrence", "dob": "1973-12-25"},
{"first_name": "Timothy", "last_name": "Hilton", "dob": "2002-03-10", "contact": {"phone_number": "+44191 496 0419"}, "address": {"street": "Studio 5\nTom union", "city": "North Carolineton", "country": "Wales", "postcode": "AL2R 0BL"}},
{"first_name": "Kerry", "last_name": "Brown", "dob": "1979-09-12", "contact": {"email_address": "angelaturner@hotmail.co.uk"}, "address": {"street": "Flat 81\nSimmons courts", "city": "Marcton", "country": "England", "postcode": "IG6 9NH"}}]
Using Descriptors
import phonenumbers
from email_validator import validate_email
from postcode_validator_uk.validators import UKPostcode
class EmailAddress:
def __get__(self, obj, objtype=None):
return self.value
def __set__(self, obj, value):
if not validate_email(value, check_deliverability=True).email:
raise ValueError("Invalid Email address.")
self.value = value
class UKPhoneNumber:
def __get__(self, obj, objtype=None):
return self.value
def __set__(self, obj, value):
if not phonenumbers.is_valid_number(phonenumbers.parse(value, "GB")):
raise ValueError("Invalid UK phone number.")
self.value = value
class Contact(Serialiser):
email_address = EmailAddress()
phone_number = UKPhoneNumber()
def __init__(self, phone_number: str = None, email_address: str = None):
self.phone_number = phone_number
self.email_address = email_address
class UKPostcode:
def __get__(self, obj, objtype=None):
return self.value
def __set__(self, obj, value):
if not UKPostcode(value):
raise ValueError("Invalid UK postcode.")
self.value = value
class UKHomeCountry:
def __get__(self, obj, objtype=None):
return self.value
def __set__(self, obj, value):
if value not in ["England", "Wales", "Scotland", "Northern Ireland"]:
raise ValueError("Invalid home country")
self.value = value
class Address(Serialiser):
postcode = UKPostcode()
country = UKHomeCountry()
def __init__(self, street: str, postcode: str, country: str, city: str):
self.street = street
self.postcode = postcode
self.city = city
self.country = country
class LegalAge:
def __get__(self, obj, objtype=None):
return self.value
def __set__(self, obj, value):
if not (date.today() - relativedelta(years=18)) > datetime.strptime(value, "%Y-%m-%d").date():
raise ValueError("Underage.")
self.value = value
class Person(Serialiser):
dob = LegalAge()
def __init__(
self,
first_name: str,
last_name: str,
dob: str,
contact: Optional[Contact] = None,
address: Optional[Address] = None
):
self.first_name = first_name
self.last_name = last_name
self.dob = dob
if contact:
self.contact = contact
if address:
self.address = address
[Person(**d) for d in data]
Output:
[{"first_name": "Gregory", "last_name": "Brooks"},
{"first_name": "Lydia", "last_name": "Lawrence"},
{"first_name": "Timothy", "last_name": "Hilton", "contact": {"phone_number": "+44191 496 0419"}, "address": {"street": "Studio 5\nTom union", "city": "North Carolineton", "country": "Wales", "postcode": "AL2R 0BL"}},
{"first_name": "Kerry", "last_name": "Brown", "contact": {"email_address": "angelaturner@hotmail.co.uk"}, "address": {"street": "Flat 81\nSimmons courts", "city": "Marcton", "country": "England", "postcode": "IG6 9NH"}}]
### Using Decorator and Descriptor
import phonenumbers
from email_validator import validate_email
from postcode_validator_uk.validators import UKPostcode
class EmailAddress:
def __get__(self, obj, objtype=None):
return self.value
def __set__(self, obj, value):
if not validate_email(value, check_deliverability=True).email:
raise ValueError("Invalid Email address.")
self.value = value
def email_address(attr: str):
def decorator(cls):
setattr(cls, attr, EmailAddress())
return cls
return decorator
class UKPhoneNumber:
def __get__(self, obj, objtype=None):
return self.value
def __set__(self, obj, value):
if not phonenumbers.is_valid_number(phonenumbers.parse(value, "GB")):
raise ValueError("Invalid UK phone number.")
self.value = value
def uk_phone_number(attr: str):
def decorator(cls):
setattr(cls, attr, UKPhoneNumber())
return cls
return decorator
@email_address("email_address")
@uk_phone_number("phone_number")
class Contact(Serialiser):
def __init__(self, phone_number: str = None, email_address: str = None):
self.phone_number = phone_number
self.email_address = email_address
class UKPostcode:
def __get__(self, obj, objtype=None):
return self.value
def __set__(self, obj, value):
if not UKPostcode(value):
raise ValueError("Invalid UK postcode.")
self.value = value
def uk_postcode(attr: str):
def decorator(cls):
setattr(cls, attr, UKPostcode())
return cls
return decorator
class UKHomeCountry:
def __get__(self, obj, objtype=None):
return self.value
def __set__(self, obj, value):
if value not in ["England", "Wales", "Scotland", "Northern Ireland"]:
raise ValueError("Invalid home country")
self.value = value
def uk_home_country(attr: str):
def decorator(cls):
setattr(cls, attr, UKHomeCountry())
return cls
return decorator
@uk_postcode("postcode")
@uk_home_country("country")
class Address(Serialiser):
def __init__(self, street: str, postcode: str, country: str, city: str):
self.street = street
self.postcode = postcode
self.city = city
self.country = country
class LegalAge:
def __get__(self, obj, objtype=None):
return self.value
def __set__(self, obj, value):
if not (date.today() - relativedelta(years=18)) > datetime.strptime(value, "%Y-%m-%d").date():
raise ValueError("Underage.")
self.value = value
def legal_age(attr: str):
def decorator(cls):
setattr(cls, attr, LegalAge())
return cls
return decorator
@legal_age("dob")
class Person(Serialiser):
def __init__(
self,
first_name: str,
last_name: str,
dob: str,
contact: Optional[Contact] = None,
address: Optional[Address] = None
):
self.first_name = first_name
self.last_name = last_name
self.dob = dob
if contact:
self.contact = contact
if address:
self.address = address
[Person(**d) for d in data]
Output:
[{"first_name": "Gregory", "last_name": "Brooks"},
{"first_name": "Lydia", "last_name": "Lawrence"},
{"first_name": "Timothy", "last_name": "Hilton", "contact": {"phone_number": "+44191 496 0419"}, "address": {"street": "Studio 5\nTom union", "city": "North Carolineton", "country": "Wales", "postcode": "AL2R 0BL"}},
{"first_name": "Kerry", "last_name": "Brown", "contact": {"email_address": "angelaturner@hotmail.co.uk"}, "address": {"street": "Flat 81\nSimmons courts", "city": "Marcton", "country": "England", "postcode": "IG6 9NH"}}]
Using @dataclass and dataclass_json
from datetime import date
from typing import Optional
from dataclasses import dataclass, field
from dataclasses_json import dataclass_json, config
import phonenumbers
from email_validator import validate_email
from postcode_validator_uk.validators import UKPostcode
def exclude_optional_dict(value):
return value is None or not value
@dataclass_json
@dataclass
class Contact:
email_address: Optional[str] = field(metadata=config(exclude=exclude_optional_dict), default=None)
phone_number: Optional[str] = field(metadata=config(exclude=exclude_optional_dict), default=None)
def __post_init__(self):
if not validate_email(self.email_address, check_deliverability=True).email:
raise ValueError("Invalid Email address.")
if not phonenumbers.is_valid_number(phonenumbers.parse(self.phone_number, "GB")):
raise ValueError("Invalid UK phone number.")
@dataclass_json
@dataclass
class Address:
street: str
postcode: str
city: str
country: str
def __post_init__(self):
if not UKPostcode(self.postcode):
raise ValueError("Invalid UK postcode.")
if self.country not in ["England", "Wales", "Scotland", "Northern Ireland"]:
raise ValueError("Invalid home country")
@dataclass_json
@dataclass
class Person:
first_name: str
last_name: str
dob: str
contact: Optional[Contact] = field(metadata=config(exclude=exclude_optional_dict), default=None)
address: Optional[Address] = field(metadata=config(exclude=exclude_optional_dict), default=None)
def __post_init__(self):
if not (date.today() - relativedelta(years=18)) > datetime.strptime(self.dob, "%Y-%m-%d").date():
raise ValueError("Underage.")
[Person(**d).to_dict() for d in data]
Output:
[{'first_name': 'Gregory', 'last_name': 'Brooks', 'dob': '1996-11-18'},
{'first_name': 'Lydia', 'last_name': 'Lawrence', 'dob': '1973-12-25'},
{'first_name': 'Timothy',
'last_name': 'Hilton',
'dob': '2002-03-10',
'contact': {'phone_number': '+44191 496 0419'},
'address': {'street': 'Studio 5\nTom union',
'city': 'North Carolineton',
'country': 'Wales',
'postcode': 'AL2R 0BL'}},
{'first_name': 'Kerry',
'last_name': 'Brown',
'dob': '1979-09-12',
'contact': {'email_address': 'angelaturner@hotmail.co.uk'},
'address': {'street': 'Flat 81\nSimmons courts',
'city': 'Marcton',
'country': 'England',
'postcode': 'IG6 9NH'}}]
Using @dataclass with custom Validation class
https://gist.github.com/rochacbruno/978405e4839142e409f8402eece505e8
from datetime import date
from typing import Optional
from dataclasses import dataclass, field
from dataclasses_json import dataclass_json, config
import phonenumbers
from email_validator import validate_email
from postcode_validator_uk.validators import UKPostcode
def exclude_optional_dict(value):
return value is None or not value
class Validations:
def __post_init__(self):
"""Run validation methods if declared.
The validation method can be a simple check
that raises ValueError or a transformation to
the field value.
The validation is performed by calling a function named:
`validate_<field_name>(self, value, field) -> field.type`
"""
for name, field in self.__dataclass_fields__.items():
if (method := getattr(self, f"validate_{name}", None)):
setattr(self, name, method(getattr(self, name), field=field))
@dataclass_json
@dataclass
class Contact(Validations):
email_address: Optional[str] = field(metadata=config(exclude=exclude_optional_dict), default=None)
phone_number: Optional[str] = field(metadata=config(exclude=exclude_optional_dict), default=None)
def validate_email_address(self, value, **_) -> str:
if not validate_email(value, check_deliverability=True).email:
raise ValueError("Invalid Email address.")
return value
def validate_phone_number(self, value, **_) -> str:
if not phonenumbers.is_valid_number(phonenumbers.parse(value, "GB")):
raise ValueError("Invalid UK phone number.")
return value
@dataclass_json
@dataclass
class Address(Validations):
street: str
postcode: str
city: str
country: str
def validate_postcode(self, value, **_) -> str:
if not UKPostcode(value):
raise ValueError("Invalid UK postcode.")
return value
def validate_country(self, value, **_) -> str:
if value not in ["England", "Wales", "Scotland", "Northern Ireland"]:
raise ValueError("Invalid home country")
return value
@dataclass_json
@dataclass
class Person:
first_name: str
last_name: str
dob: str
contact: Optional[Contact] = field(metadata=config(exclude=exclude_optional_dict), default=None)
address: Optional[Address] = field(metadata=config(exclude=exclude_optional_dict), default=None)
def validate_dob(self, value, **_) -> str:
if not (date.today() - relativedelta(years=18)) > datetime.strptime(value, "%Y-%m-%d").date():
raise ValueError("Underage.")
return value
[Person(**d).to_dict() for d in data]
Output:
[{'first_name': 'Gregory', 'last_name': 'Brooks', 'dob': '1996-11-18'},
{'first_name': 'Lydia', 'last_name': 'Lawrence', 'dob': '1973-12-25'},
{'first_name': 'Timothy',
'last_name': 'Hilton',
'dob': '2002-03-10',
'contact': {'phone_number': '+44191 496 0419'},
'address': {'street': 'Studio 5\nTom union',
'city': 'North Carolineton',
'country': 'Wales',
'postcode': 'AL2R 0BL'}},
{'first_name': 'Kerry',
'last_name': 'Brown',
'dob': '1979-09-12',
'contact': {'email_address': 'angelaturner@hotmail.co.uk'},
'address': {'street': 'Flat 81\nSimmons courts',
'city': 'Marcton',
'country': 'England',
'postcode': 'IG6 9NH'}}]
Validation using Schema
from schema import Schema, And, Use, Optional
from datetime import date
import phonenumbers
from email_validator import validate_email
from postcode_validator_uk.validators import UKPostcode
legal_age = lambda d: (date.today() - relativedelta(years=18)) > datetime.strptime(d, "%Y-%m-%d").date()
valid_uk_phonenumber = lambda d: phonenumbers.is_valid_number(phonenumbers.parse(d, "GB"))
valid_email_address = lambda d: validate_email(d, check_deliverability=True).email
valid_postcode = lambda d: UKPostcode(d)
schema = Schema(
[
{
"first_name": And(str, len),
"last_name": And(str, len),
"dob": And(Use(str), legal_age),
Optional("contact"): {
Optional("phone_number"): And(Use(str), valid_uk_phonenumber),
Optional("email_address"): And(Use(str), valid_email_address),
},
Optional("address"): {
"street": And(Use(str), len),
"postcode": And(Use(str), valid_postcode),
"country": And(Use(str), lambda d: d in ["England", "Wales", "Scotland", "Northern Ireland"]),
"city": And(Use(str), len),
}
}
]
)
persons = schema.validate(data)
persons
Output:
[{'first_name': 'Gregory', 'last_name': 'Brooks', 'dob': '1996-11-18'},
{'first_name': 'Lydia',
'last_name': 'Lawrence',
'dob': '1973-12-25',
'contact': {}},
{'first_name': 'Timothy',
'last_name': 'Hilton',
'dob': '2002-03-10',
'contact': {'phone_number': '+44191 496 0419'},
'address': {'street': 'Studio 5\nTom union',
'city': 'North Carolineton',
'country': 'Wales',
'postcode': 'AL2R 0BL'}},
{'first_name': 'Kerry',
'last_name': 'Brown',
'dob': '1979-09-12',
'contact': {'email_address': 'angelaturner@hotmail.co.uk'},
'address': {'street': 'Flat 81\nSimmons courts',
'city': 'Marcton',
'country': 'England',
'postcode': 'IG6 9NH'}}]
Validation using Cerberus
from cerberus import Validator
from datetime import date
import phonenumbers
from email_validator import validate_email
from postcode_validator_uk.validators import UKPostcode
def legal_age(field, value, error):
try:
if not bool(
(date.today() - relativedelta(years=18)) > datetime.strptime(value, "%Y-%m-%d").date()
):
error(field, "Underage")
except TypeError:
error(field, "Underage")
def valid_uk_phonenumber(field, value, error):
try:
if not bool(phonenumbers.is_valid_number(phonenumbers.parse(value, "GB"))):
error(field, "Invalid phone number.")
except TypeError:
error(field, "Invalid phone number.")
def valid_email_address(field, value, error):
try:
if not bool(validate_email(value, check_deliverability=True).email):
error(field, "Invalid email address")
except TypeError:
error(field, "Invalid email address")
def valid_postcode(field, value, error):
try:
if not bool(UKPostcode(value)):
error(field, "Invalid Postcode")
except TypeError:
error(field, "Invalid Postcode")
def valid_country(field, value, error):
try:
if value not in ["England", "Wales", "Scotland", "Northern Ireland"]:
error(field, "Invalid Postcode")
except TypeError:
error(field, "Invalid Postcode")
schema = {
"first_name": {"type": "string", "required": True},
"last_name": {"type": "string", "required": True},
"dob": {"validator": legal_age, "required": True},
"contact": {
"type": "dict",
"schema": {
"phone_number": {"validator": valid_uk_phonenumber},
"email_address": {"validator": valid_email_address},
}
},
"address": {
"type": "dict",
"schema": {
"street": {"type": "string", "required": True},
"postcode": {"validator": valid_postcode, "required": True},
"country": {"validator": valid_country, "required": True},
"city": {"type": "string", "required": True},
}
}
}
v = Validator(schema)
for d in data:
if not v.validate(d):
print(v.errors)
else:
print("Valid Schema", d)
Output:
Valid Schema {'first_name': 'Gregory', 'last_name': 'Brooks', 'dob': '1996-11-18'}
Valid Schema {'first_name': 'Lydia', 'last_name': 'Lawrence', 'dob': '1973-12-25', 'contact': {}}
Valid Schema {'first_name': 'Timothy', 'last_name': 'Hilton', 'dob': '2002-03-10', 'contact': {'phone_number': '+44191 496 0419'}, 'address': {'street': 'Studio 5\nTom union', 'city': 'North Carolineton', 'country': 'Wales', 'postcode': 'AL2R 0BL'}}
Valid Schema {'first_name': 'Kerry', 'last_name': 'Brown', 'dob': '1979-09-12', 'contact': {'email_address': 'angelaturner@hotmail.co.uk'}, 'address': {'street': 'Flat 81\nSimmons courts', 'city': 'Marcton', 'country': 'England', 'postcode': 'IG6 9NH'}}
Cerberus Validation with class
from cerberus import Validator
from datetime import date
import phonenumbers
from email_validator import validate_email
from postcode_validator_uk.validators import UKPostcode
class CustomValidator(Validator):
def _validate_type_legal_age(self, value):
try:
if (date.today() - relativedelta(years=18)) > datetime.strptime(value, "%Y-%m-%d").date():
return True
except TypeError:
self._error(self._field, "Underage")
def _validate_type_uk_phonenumber(self, value):
try:
if phonenumbers.is_valid_number(phonenumbers.parse(value, "GB")):
return True
except TypeError:
self._error(self._field, "Invalid phone number.")
def _validate_type_email_address(self, value):
try:
if validate_email(value, check_deliverability=True).email:
return True
except TypeError:
self._error(self._field, "Invalid email address")
def _validate_type_postcode(self, value):
try:
if UKPostcode(value):
return True
except TypeError:
self._error(self._field, "Invalid Postcode")
def _validate_type_country(self, value):
try:
if value in ["England", "Wales", "Scotland", "Northern Ireland"]:
return True
except TypeError:
self._error(self._field, "Invalid Postcode")
schema = {
"first_name": {"type": "string", "required": True},
"last_name": {"type": "string", "required": True},
"dob": {"type": "legal_age", "required": True},
"contact": {
"type": "dict",
"schema": {
"phone_number": {"type": "uk_phonenumber"},
"email_address": {"type": "email_address"},
}
},
"address": {
"type": "dict",
"schema": {
"street": {"type": "string", "required": True},
"postcode": {"type": "postcode", "required": True},
"country": {"type": "country", "required": True},
"city": {"type": "string", "required": True},
}
}
}
v = CustomValidator(schema)
for d in data:
if not v.validate(d):
print(v.errors)
else:
print("Valid Schema", d)
Output:
Valid Schema {'first_name': 'Gregory', 'last_name': 'Brooks', 'dob': '1996-11-18'}
Valid Schema {'first_name': 'Lydia', 'last_name': 'Lawrence', 'dob': '1973-12-25', 'contact': {}}
Valid Schema {'first_name': 'Timothy', 'last_name': 'Hilton', 'dob': '2002-03-10', 'contact': {'phone_number': '+44191 496 0419'}, 'address': {'street': 'Studio 5\nTom union', 'city': 'North Carolineton', 'country': 'Wales', 'postcode': 'AL2R 0BL'}}
Valid Schema {'first_name': 'Kerry', 'last_name': 'Brown', 'dob': '1979-09-12', 'contact': {'email_address': 'angelaturner@hotmail.co.uk'}, 'address': {'street': 'Flat 81\nSimmons courts', 'city': 'Marcton', 'country': 'England', 'postcode': 'IG6 9NH'}}
Validation using Pydantic
from typing import Optional
from pydantic import BaseModel, validator, Field
class Address(BaseModel):
street: str
city: str
country: str
postcode: str
@validator("postcode")
def postcode_is_valid(cls, value):
if not UKPostcode(value):
raise ValueError("Must be a valid UK Postcode.")
return value
@validator("country")
def country_of_the_uk(cls, value):
if value not in ["England", "Wales", "Scotland", "Northern Ireland"]:
raise ValueError("Must be a country from the UK.")
return value
class Contact(BaseModel):
phone_number: Optional[str] = None
email_address: Optional[str] = None
@validator("phone_number")
def valid_uk_phonenumber(cls, value):
if not phonenumbers.is_valid_number(phonenumbers.parse(value, "GB")):
raise ValueError("Must be a valid UK phone number.")
return value
@validator("email_address")
def _validate_type_email_address(cls, value):
if not validate_email(value, check_deliverability=True).email:
raise ValueError("Invalid email address")
return value
class Person(BaseModel):
first_name: str
last_name: str
dob: str
contact: Optional[Contact] = None
address: Optional[Address] = None
@validator("dob")
def legal_age(cls, value):
if (date.today() - relativedelta(years=18)) < datetime.strptime(value, "%Y-%m-%d").date():
raise ValueError("Underage")
return value
persons = [Person(**d).dict() for d in data]
persons
Output:
[{'first_name': 'Gregory',
'last_name': 'Brooks',
'dob': '1996-11-18',
'contact': None,
'address': None},
{'first_name': 'Lydia',
'last_name': 'Lawrence',
'dob': '1973-12-25',
'contact': {'phone_number': None, 'email_address': None},
'address': None},
{'first_name': 'Timothy',
'last_name': 'Hilton',
'dob': '2002-03-10',
'contact': {'phone_number': '+44191 496 0419', 'email_address': None},
'address': {'street': 'Studio 5\nTom union',
'city': 'North Carolineton',
'country': 'Wales',
'postcode': 'AL2R 0BL'}},
{'first_name': 'Kerry',
'last_name': 'Brown',
'dob': '1979-09-12',
'contact': {'phone_number': None,
'email_address': 'angelaturner@hotmail.co.uk'},
'address': {'street': 'Flat 81\nSimmons courts',
'city': 'Marcton',
'country': 'England',
'postcode': 'IG6 9NH'}}]
Pydantic excluding None values recipe
def union(source, destination):
for key, value in source.items():
if isinstance(value, dict):
node = destination.setdefault(key, {})
union(value, node)
else:
destination[key] = value
return destination
def exclude_optional_dict(model: BaseModel):
return union(model.dict(exclude_unset=True), model.dict(exclude_none=True))
persons = [exclude_optional_dict(Person(**d)) for d in data]
persons
Output:
[{'first_name': 'Gregory', 'last_name': 'Brooks', 'dob': '1996-11-18'},
{'first_name': 'Lydia',
'last_name': 'Lawrence',
'dob': '1973-12-25',
'contact': {}},
{'first_name': 'Timothy',
'last_name': 'Hilton',
'dob': '2002-03-10',
'contact': {'phone_number': '+44191 496 0419'},
'address': {'street': 'Studio 5\nTom union',
'city': 'North Carolineton',
'country': 'Wales',
'postcode': 'AL2R 0BL'}},
{'first_name': 'Kerry',
'last_name': 'Brown',
'dob': '1979-09-12',
'contact': {'email_address': 'angelaturner@hotmail.co.uk'},
'address': {'street': 'Flat 81\nSimmons courts',
'city': 'Marcton',
'country': 'England',
'postcode': 'IG6 9NH'}}]
Posted on January 20, 2023
Join Our Newsletter. No Spam, Only the good stuff.
Sign up to receive the latest update from our blog.