Combining Django & Elasticsearch
Search is hard
What is relevant information
Classify information importance
Selective search attributes
What’s Elasticsearch
Based on Lucene
Distributed
Multitenant
Full-text
RESTful API
JSON serialization
Open Source:
Apache License 2.0
It’s Java
Elasticsearch & Python
Python Bindings
pyelasticsearch
elasticsearch
elasticsearch_dsl
python-requests
Use the official libraries
$ pip install elasticsearch-dsl
>>> from elasticsearch import Elasticsearch
>>> from elasticsearch_dsl import Search
>>> client = Elasticsearch()
>>> s = Search(using=client, index="blog") \
... .query("match", title="django") \
... .filter("term", is_public=True)
>>> response = s.execute()
Elasticsearch & Django
Django Bindings
Haystack
djangoes
elasticutils
django-simple-elasticsearch
Example
Index Mapping
from elasticsearch_dsl import DocType, String
class SearchArticle(DocType):
title = String()
text = String(analyzer='english')
slug = String(index='not_analyzed')
url = String(index='not_analyzed')
# ...
class Meta:
index = 'blog'
AppConfig
from elasticsearch_dsl.connections import connections
class BlogConfig(AppConfig):
name = 'blog'
def ready(self):
from . import models, search, signals
connections.create_connection(
**settings.ELASTICSEARCH_CONNS)
search.SearchArticle.init()
post_save.connect(signals.post_save_article,
sender=models.Article)
Signals
def post_save_article(sender, instance, created, **kwargs):
article = SearchArticle(meta={‘id’: instance.pk}) \
if created else SearchArticle.get(id=instance.pk)
article.title = instance.title
article.text = instance.text
article.url = instance.get_absolute_url()
article.save()
def post_delete_article(sender, instance, **kwargs):
article = SearchArticle.get(id=instance.pk)
article.delete()
Search View
def search(request):
q = request.GET.get('q', '')
context = {'query': q, 'results': []}
if q:
search = SearchArticle.search()
search = search.query('simple_query_string', query=q,
fields=['title', 'text'])
if not request.user.is_authenticated():
search = search.filter('term', is_public=True)
context['results'] = search.execute()
return render(request, 'blog/search.html', context)
Problems
Indexing happens
during request time
Solution: Celery Integration
Celery Integration
@shared_task(bind=True, default_retry_delay=60, max_retries=3)
def index_article(self, pk):
try:
article = Article.objects.get(pk=pk)
except Article.ObjectDoesNotExist:
self.retry()
try:
search_article = SearchArticle.get(id=pk)
except elasticsearch.NotFoundError:
search_article = SearchArticle(meta={‘id’: pk})
search_article.title = instance.title
# ...
search_article.save()
Demo
�https://github.com/MarkusH/talk-django-elasticsearch
Thank you!