Stephen Gilmore

My Django Postgres FTS experience for a blog

Django September 27th, 2024 6 minute read.

This site (for now) uses PostgreSQL because it has Django ORM support for full text search. I had to cobble together a few different guides to get Postgres full text search working the way I wanted on my site. This post is about how I got it working (with plenty of help).

For SQLite or if this seems unnecessarily complex, Basic Search via LearnDjango.com may be a better starting point

models.py

The Entry model has a few features to help with full text search:

  1. search_vector field: This field stores a PostgreSQL search vector
  2. GinIndex on the search_vector field helps perform faster searches.
  3. search_components is a helper method that ranks the different fields (components) that will make up the search vector. In this example, I'm prioritizing the title of an entry, then tags, and finally, the content.
# models.py

from django.contrib.postgres.indexes import GinIndex
from django.contrib.postgres.search import SearchVectorField
from django.db import models

from django.utils.translation import gettext_lazy as _

class Entry(models.Model):
    title = models.CharField(blank=True, max_length=255)
    content = models.TextField(blank=True)
    tags = models.ManyToManyField(Tag, through="EntryTagJoin", blank=True, related_name="entries")
    search_vector = SearchVectorField(null=True)

    class Meta:
        indexes = [GinIndex(fields=["search_vector"])]

    @property
    def search_components(self):
        """Defines & ranks field components for SearchVectorField"""
        return {
            "A": self.title,
            "B": " ".join(self.tags.values_list("tag", flat=True)),
            "C": self.content,
        }

signals.py

Whenever a Tag or our Entry model changes, we want to update the search_vector field to incorporate those updates. (This would not have been possible without Simon Willison's Blog Source Code).

# signals.py
import operator
from functools import reduce

from capture.models import Entry, Tag
from django.contrib.postgres.search import SearchVector
from django.db import transaction
from django.db.models import TextField, Value
from django.db.models.signals import m2m_changed, post_save
from django.dispatch import receiver


@receiver(post_save, sender=Entry)
def on_save(sender, instance: Entry, created, **kwargs):
    """
    Update search_vector for the Entry instance after it's saved.
    """
    transaction.on_commit(update_search_vector(instance))


@receiver(m2m_changed, sender=Entry.tags.through)
def handle_m2m_change(sender, instance, action, **kwargs):
    if action in ["post_add", "post_remove", "post_clear"]:
        transaction.on_commit(update_search_vector(instance))


@receiver(post_save, sender=Tag)
def handle_tag_save(sender, instance, **kwargs):
    # Update search vector for all related YourModel instances
    for entry in instance.entries.all():
        transaction.on_commit(update_search_vector(entry))


def update_search_vector(instance: Entry):
    """
    Prepare and return a function to update the search vector for an Entry instance.
    This function is designed to be used with transaction.on_commit().
    """

    # Get the search components with ranking from the instance
    # format: tuple[weight:str, text:str]
    components = instance.search_components
    # Store the primary key of the instance
    pk = instance.pk

    def on_commit():
        """
        Inner function to be executed after the database transaction is committed.
        This ensures that all related objects (like tags)
        are properly saved before updating the search vector.
        """
        search_vectors = []

        # Iterate over each component (title, content, tags) and its corresponding weight
        for weight, text in list(components.items()):
            # Create a SearchVector for each component
            # - Value(): Wraps the text in a Value expression
            # - output_field=TextField(): Specifies that the output should be treated as text
            # - weight: Assigns importance to this component in the search (A is highest, D is lowest)
            # - config='english': Uses English language configuration for stemming and stop words
            search_vectors.append(
                SearchVector(Value(text, output_field=TextField()), weight=weight, config="english")
            )

        # Update the Entry instance with the combined search vector
        # - filter(pk=pk): Ensures we're updating the correct instance
        # - reduce(operator.add, search_vectors): Combines all search vectors into one
        instance.__class__.objects.filter(pk=pk).update(
            search_vector=reduce(operator.add, search_vectors)
        )

    # Return the inner function to be executed later
    return on_commit

views.py

This view has sort of a lot going on, I'll let the comments do all the work. It's a list view of entries that also includes a form to search/filter Entries.

# views.py

from .forms import EntrySearchForm
from .models import Entry

from django.contrib.auth.mixins import LoginRequiredMixin
from django.contrib.postgres.search import SearchQuery, SearchRank
from django.db.models import F
from django.views.generic import ListView

class EntryListView(LoginRequiredMixin, ListView):
    model = Entry
    template_name = "capture/entry_list.html"
    context_object_name = "entries"
    paginate_by = 30

    def get_queryset(self):
        queryset = super().get_queryset()

        # Filter the queryset on the current user
        queryset = queryset.filter(user=self.request.user).order_by("-entry_time")

        # Validate the search form
        form = EntrySearchForm(self.request.GET, user=self.request.user)
        if form.is_valid():
            # Get the search query from a text field in the form
            query = form.cleaned_data["query"]
            if query:
                search_query = SearchQuery(query, config="english", search_type="websearch")
                queryset = (
                    queryset.annotate(rank=SearchRank(F("search_vector"), search_query))
                    .filter(search_vector=search_query)
                    .order_by("-rank", "-entry_time")
                )

            # Get the tag from the drop down selection field in the form
            # Filter on the tag if one was selected
            tag = form.cleaned_data["tag"]
            if tag:
                queryset = queryset.filter(tags__tag=tag)

        return queryset.prefetch_related("tags")

    def get_context_data(self, **kwargs):
        context = super().get_context_data(**kwargs)
        context["form"] = EntrySearchForm(self.request.GET, user=self.request.user)

        # Create a query_string in the context that
        # is used to preserve the current url parameters
        # on page links.
        query_params = self.request.GET.copy()
        if "page" in query_params.keys():
            query_params.pop("page")
        context["query_string"] = query_params.urlencode()

        return context

References