1 of 36

Combining Django & Elasticsearch

2 of 36

Search is hard

3 of 36

What is relevant information

4 of 36

Classify information importance

5 of 36

Selective search attributes

6 of 36

What’s Elasticsearch

7 of 36

Based on Lucene

8 of 36

Distributed

Multitenant

Full-text

9 of 36

RESTful API

10 of 36

JSON serialization

11 of 36

Open Source:

Apache License 2.0

12 of 36

It’s Java

13 of 36

Elasticsearch & Python

14 of 36

Python Bindings

15 of 36

pyelasticsearch

16 of 36

elasticsearch

elasticsearch_dsl

17 of 36

python-requests

18 of 36

Use the official libraries

$ pip install elasticsearch-dsl

19 of 36

>>> from elasticsearch import Elasticsearch

>>> from elasticsearch_dsl import Search

>>> client = Elasticsearch()

>>> s = Search(using=client, index="blog") \

... .query("match", title="django") \

... .filter("term", is_public=True)

>>> response = s.execute()

20 of 36

Elasticsearch & Django

21 of 36

Django Bindings

22 of 36

Haystack

23 of 36

djangoes

24 of 36

elasticutils

25 of 36

django-simple-elasticsearch

26 of 36

Example

27 of 36

Index Mapping

from elasticsearch_dsl import DocType, String

class SearchArticle(DocType):

title = String()

text = String(analyzer='english')

slug = String(index='not_analyzed')

url = String(index='not_analyzed')

# ...

class Meta:

index = 'blog'

28 of 36

AppConfig

from elasticsearch_dsl.connections import connections

class BlogConfig(AppConfig):

name = 'blog'

def ready(self):

from . import models, search, signals

connections.create_connection(

**settings.ELASTICSEARCH_CONNS)

search.SearchArticle.init()

post_save.connect(signals.post_save_article,

sender=models.Article)

29 of 36

Signals

def post_save_article(sender, instance, created, **kwargs):

article = SearchArticle(meta={‘id’: instance.pk}) \

if created else SearchArticle.get(id=instance.pk)

article.title = instance.title

article.text = instance.text

article.url = instance.get_absolute_url()

article.save()

def post_delete_article(sender, instance, **kwargs):

article = SearchArticle.get(id=instance.pk)

article.delete()

30 of 36

Search View

def search(request):

q = request.GET.get('q', '')

context = {'query': q, 'results': []}

if q:

search = SearchArticle.search()

search = search.query('simple_query_string', query=q,

fields=['title', 'text'])

if not request.user.is_authenticated():

search = search.filter('term', is_public=True)

context['results'] = search.execute()

return render(request, 'blog/search.html', context)

31 of 36

Problems

32 of 36

Indexing happens

during request time

33 of 36

Solution: Celery Integration

34 of 36

Celery Integration

@shared_task(bind=True, default_retry_delay=60, max_retries=3)

def index_article(self, pk):

try:

article = Article.objects.get(pk=pk)

except Article.ObjectDoesNotExist:

self.retry()

try:

search_article = SearchArticle.get(id=pk)

except elasticsearch.NotFoundError:

search_article = SearchArticle(meta={‘id’: pk})

search_article.title = instance.title

# ...

search_article.save()

35 of 36

Demo

https://github.com/MarkusH/talk-django-elasticsearch

36 of 36

Thank you!