website/facedetection/services.py
import json
import logging
from django.conf import settings
from django.db.models import Count, Q
from django.utils import timezone
import boto3
from sentry_sdk import capture_exception
from members.models.member import Member
from photos.models import Photo
from utils.media.services import get_media_url
from .models import FaceDetectionPhoto, ReferenceFace
logger = logging.getLogger(__name__)
def execute_data_minimisation(dry_run=False):
"""Delete old reference faces.
This deletes reference faces that have been marked for deletion by the user for
some time, as well as reference faces of users that have not logged in for a year.
"""
delete_period_inactive_member = timezone.now() - timezone.timedelta(days=365)
delete_period_marked_for_deletion = timezone.now() - timezone.timedelta(
days=settings.FACEDETECTION_REFERENCE_FACE_STORAGE_PERIOD_AFTER_DELETE_DAYS
)
queryset = ReferenceFace.objects.filter(
Q(marked_for_deletion_at__lte=delete_period_marked_for_deletion)
| Q(user__last_login__lte=delete_period_inactive_member)
)
if not dry_run:
for reference_face in queryset:
reference_face.delete() # Don't run the queryset method, this will also delete the file
return queryset
def _serialize_lambda_source(source: ReferenceFace | FaceDetectionPhoto):
"""Serialize a source object to be sent to the lambda function."""
if isinstance(source, ReferenceFace):
return {
"type": "reference",
"pk": source.pk,
"token": source.token,
"photo_url": get_media_url(
source.file.thumbnails.large,
absolute_url=True,
# Lambda calls can be queued for up to 6 hours by default, so
# we make sure the url it uses is valid for at least that long.
expire_seconds=60 * 60 * 7,
),
}
if isinstance(source, FaceDetectionPhoto):
return {
"type": "photo",
"pk": source.pk,
"token": source.token,
"photo_url": get_media_url(
source.photo.file.thumbnails.photo_large,
absolute_url=True,
expire_seconds=60 * 60 * 7,
),
}
raise ValueError("source must be a ReferenceFace or FaceDetectionPhoto")
def _trigger_facedetection_lambda_batch(
sources: list[ReferenceFace | FaceDetectionPhoto],
):
"""Submit a batch of sources to the facedetection lambda function.
If submitting the sources fails, this is logged and
reported to Sentry, but no exception is raised.
"""
payload = {
"api_url": settings.BASE_URL,
"sources": [_serialize_lambda_source(source) for source in sources],
}
for source in sources:
source.submitted_at = timezone.now()
source.save()
try:
lambda_client = boto3.client(
service_name="lambda",
aws_access_key_id=settings.AWS_ACCESS_KEY_ID,
aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY,
)
response = lambda_client.invoke(
FunctionName=settings.FACEDETECTION_LAMBDA_ARN,
InvocationType="Event",
Payload=json.dumps(payload),
)
if response["StatusCode"] != 202:
raise Exception("Lambda response was not 202.")
except Exception as e:
logger.error(
"Submitting sources to lambda failed. Reason: %s", str(e), exc_info=True
)
capture_exception(e)
def trigger_facedetection_lambda(sources: list[ReferenceFace | FaceDetectionPhoto]):
"""Submit a sources to the facedetection lambda function for processing.
This function will check if the sources are valid and, if a lambda function has
been configured, try to submit the sources to the lambda function in batches.
If no lambda function has been configured, or submitting (a batch of) sources fails,
this is ignored. The sources can be submitted again later.
"""
if len(sources) == 0:
raise ValueError("No sources to process.")
if any(source.status != source.Status.PROCESSING for source in sources):
raise ValueError("A source has already been processed.")
if settings.FACEDETECTION_LAMBDA_ARN is None:
logger.warning(
"No Lambda ARN has been configured. Sources will not be processed."
)
return
batch_size = settings.FACEDETECTION_LAMBDA_BATCH_SIZE
for batch in [
sources[i : i + batch_size] for i in range(0, len(sources), batch_size)
]:
_trigger_facedetection_lambda_batch(batch)
def resubmit_reference_faces() -> list[ReferenceFace]:
"""Resubmit reference faces that (should) have already been submitted but aren't done.
Returns a list of reference faces that have been resubmitted.
"""
submitted_before = timezone.now() - timezone.timedelta(hours=7)
references = list(
ReferenceFace.objects.filter(
status=ReferenceFace.Status.PROCESSING,
).filter(Q(submitted_at__lte=submitted_before) | Q(submitted_at__isnull=True))
)
if references:
trigger_facedetection_lambda(references)
return references
def resubmit_photos() -> list[FaceDetectionPhoto]:
"""Resubmit photos that (should) have already been submitted but aren't done.
Returns a list of photos that have been resubmitted.
"""
submitted_before = timezone.now() - timezone.timedelta(hours=7)
photos = list(
FaceDetectionPhoto.objects.filter(
status=FaceDetectionPhoto.Status.PROCESSING,
)
.filter(Q(submitted_at__lte=submitted_before) | Q(submitted_at__isnull=True))
.select_related("photo")
)
if photos:
trigger_facedetection_lambda(photos)
return photos
def submit_new_photos() -> int:
"""Submit photos for which no FaceDetectionPhoto exists yet.
Returns the number of new photos that have been submitted.
"""
count = 0
if not Photo.objects.filter(facedetectionphoto__isnull=True).exists():
return count
# We have another level of batching (outside of trigger_facedetection_lambda)
# for performance and responsive output when there are thousands of photos.
while Photo.objects.filter(facedetectionphoto__isnull=True).exists():
photos = FaceDetectionPhoto.objects.bulk_create(
[
FaceDetectionPhoto(photo=photo)
for photo in Photo.objects.filter(facedetectionphoto__isnull=True)[:400]
]
)
trigger_facedetection_lambda(photos)
count += len(photos)
return count
def get_user_photos(member: Member):
reference_faces = member.reference_faces.filter(
marked_for_deletion_at__isnull=True,
)
# Filter out matches from long before the member's first membership.
albums_since = member.earliest_membership.since - timezone.timedelta(days=31)
photos = Photo.objects.select_related("album").filter(album__date__gte=albums_since)
# Filter out matches from after the member's last membership.
if member.latest_membership.until is not None:
photos = photos.filter(album__date__lte=member.latest_membership.until)
# Actually match the reference faces.
photos = photos.filter(album__hidden=False, album__is_processing=False).filter(
facedetectionphoto__encodings__matches__reference__in=reference_faces,
)
return (
photos.annotate(member_likes=Count("likes", filter=Q(likes__member=member)))
.select_properties("num_likes")
.order_by("-album__date", "-pk")
)