My Django Postgres FTS experience for a blog
This site (for now) uses PostgreSQL because it has Django ORM support for full text search. I had to cobble together a few different guides to get Postgres full text search working the way I wanted on my site. This post is about how I got it working (with plenty of help).
For SQLite or if this seems unnecessarily complex, Basic Search via LearnDjango.com may be a better starting point
models.py
The Entry model has a few features to help with full text search:
search_vector
field: This field stores a PostgreSQL search vector- GinIndex on the
search_vector
field helps perform faster searches. search_components
is a helper method that ranks the different fields (components) that will make up the search vector. In this example, I'm prioritizing the title of an entry, then tags, and finally, the content.
# models.py
from django.contrib.postgres.indexes import GinIndex
from django.contrib.postgres.search import SearchVectorField
from django.db import models
from django.utils.translation import gettext_lazy as _
class Entry(models.Model):
title = models.CharField(blank=True, max_length=255)
content = models.TextField(blank=True)
tags = models.ManyToManyField(Tag, through="EntryTagJoin", blank=True, related_name="entries")
search_vector = SearchVectorField(null=True)
class Meta:
indexes = [GinIndex(fields=["search_vector"])]
@property
def search_components(self):
"""Defines & ranks field components for SearchVectorField"""
return {
"A": self.title,
"B": " ".join(self.tags.values_list("tag", flat=True)),
"C": self.content,
}
signals.py
Whenever a Tag or our Entry model changes, we want to update the search_vector
field to incorporate those updates. (This would not have been possible without Simon Willison's Blog Source Code).
# signals.py
import operator
from functools import reduce
from capture.models import Entry, Tag
from django.contrib.postgres.search import SearchVector
from django.db import transaction
from django.db.models import TextField, Value
from django.db.models.signals import m2m_changed, post_save
from django.dispatch import receiver
@receiver(post_save, sender=Entry)
def on_save(sender, instance: Entry, created, **kwargs):
"""
Update search_vector for the Entry instance after it's saved.
"""
transaction.on_commit(update_search_vector(instance))
@receiver(m2m_changed, sender=Entry.tags.through)
def handle_m2m_change(sender, instance, action, **kwargs):
if action in ["post_add", "post_remove", "post_clear"]:
transaction.on_commit(update_search_vector(instance))
@receiver(post_save, sender=Tag)
def handle_tag_save(sender, instance, **kwargs):
# Update search vector for all related YourModel instances
for entry in instance.entries.all():
transaction.on_commit(update_search_vector(entry))
def update_search_vector(instance: Entry):
"""
Prepare and return a function to update the search vector for an Entry instance.
This function is designed to be used with transaction.on_commit().
"""
# Get the search components with ranking from the instance
# format: tuple[weight:str, text:str]
components = instance.search_components
# Store the primary key of the instance
pk = instance.pk
def on_commit():
"""
Inner function to be executed after the database transaction is committed.
This ensures that all related objects (like tags)
are properly saved before updating the search vector.
"""
search_vectors = []
# Iterate over each component (title, content, tags) and its corresponding weight
for weight, text in list(components.items()):
# Create a SearchVector for each component
# - Value(): Wraps the text in a Value expression
# - output_field=TextField(): Specifies that the output should be treated as text
# - weight: Assigns importance to this component in the search (A is highest, D is lowest)
# - config='english': Uses English language configuration for stemming and stop words
search_vectors.append(
SearchVector(Value(text, output_field=TextField()), weight=weight, config="english")
)
# Update the Entry instance with the combined search vector
# - filter(pk=pk): Ensures we're updating the correct instance
# - reduce(operator.add, search_vectors): Combines all search vectors into one
instance.__class__.objects.filter(pk=pk).update(
search_vector=reduce(operator.add, search_vectors)
)
# Return the inner function to be executed later
return on_commit
views.py
This view has sort of a lot going on, I'll let the comments do all the work. It's a list view of entries that also includes a form to search/filter Entries.
# views.py
from .forms import EntrySearchForm
from .models import Entry
from django.contrib.auth.mixins import LoginRequiredMixin
from django.contrib.postgres.search import SearchQuery, SearchRank
from django.db.models import F
from django.views.generic import ListView
class EntryListView(LoginRequiredMixin, ListView):
model = Entry
template_name = "capture/entry_list.html"
context_object_name = "entries"
paginate_by = 30
def get_queryset(self):
queryset = super().get_queryset()
# Filter the queryset on the current user
queryset = queryset.filter(user=self.request.user).order_by("-entry_time")
# Validate the search form
form = EntrySearchForm(self.request.GET, user=self.request.user)
if form.is_valid():
# Get the search query from a text field in the form
query = form.cleaned_data["query"]
if query:
search_query = SearchQuery(query, config="english", search_type="websearch")
queryset = (
queryset.annotate(rank=SearchRank(F("search_vector"), search_query))
.filter(search_vector=search_query)
.order_by("-rank", "-entry_time")
)
# Get the tag from the drop down selection field in the form
# Filter on the tag if one was selected
tag = form.cleaned_data["tag"]
if tag:
queryset = queryset.filter(tags__tag=tag)
return queryset.prefetch_related("tags")
def get_context_data(self, **kwargs):
context = super().get_context_data(**kwargs)
context["form"] = EntrySearchForm(self.request.GET, user=self.request.user)
# Create a query_string in the context that
# is used to preserve the current url parameters
# on page links.
query_params = self.request.GET.copy()
if "page" in query_params.keys():
query_params.pop("page")
context["query_string"] = query_params.urlencode()
return context
References
- Django Docs: Search. The docs.
- Simon Willison's Blog Source Code. Simon Willison's blog implements signals to handle updates to many-to-many relationships. I hadn't seen this elsewhere.
- testdriven.io: Basic and Full-text Search with Django