1 of 42

The Zen of Polymorphism

Brett Slatkin | 2024-10-10 | PyCon NL

2 of 42

Build a calculator program

Parse text containing formulas into objects

"1 + 2"

Calculate the result by traversing objects

3

1

2

+

1

2

+

3 of 42

Declare objects to represent formulas

class Integer:

def __init__(self, value):

self.value = value

class Add:

def __init__(self, left, right):

self.left = left

self.right = right

class Multiply:

def __init__(self, left, right):

self.left = left

self.right = right

1

+

*

4 of 42

Parse a formula into objects

formula = "(3 + 5) * (4 + 7)"

tree = parse_formula(formula)

# tree == Multiply(

# Add(Integer(3), Integer(5)),

# Add(Integer(4), Integer(7)),

# )

3

5

+

4

7

+

*

5 of 42

Three ways to calculate the result

One big function
Object-oriented programming
Dynamic dispatch

6 of 42

1. One big function

7 of 42

Implement calculate using isinstance checks

def calculate(node):

if isinstance(node, Integer):

return node.value

elif isinstance(node, Add):

return calculate(node.left) + calculate(node.right)

elif isinstance(node, Multiply):

return calculate(node.left) * calculate(node.right)

else:

raise NotImplementedError

8 of 42

Exercise the calculate function

assert calculate(tree) == 88

# What it's doing

calculate(tree)

calculate(node.left)

calculate(node.left) == 3

calculate(node.right) == 5

== 3 + 5 == 8

calculate(node.right)

calculate(node.left) == 4

calculate(node.right) == 7

== 4 + 7 == 11

== 8 * 11 == 88

3

5

+

4

7

+

*

9 of 42

Add a new node type

class Power:

def __init__(self, base, exponent):

self.base = base

self.exponent = exponent

def calculate(node):

if isinstance(node, Integer): ...

elif isinstance(node, Add): ...

elif isinstance(node, Multiply): ...

elif isinstance(node, Power):

return calculate(node.base) ** calculate(node.exponent)

else: ...

10 of 42

Use a new node type

formula = "(2 ^ 3) + 4"

tree = parse_formula(formula)

# tree == Add(

# Power(

# Integer(2),

# Integer(3),

# ),

# Integer(4),

# )

assert calculate(tree) == 12

# What it's doing

calculate(tree)

calculate(node.left)

calculate(node.base) == 2

calculate(node.exponent) == 3

== 2 ** 3 == 8

calculate(node.right) == 4

== 8 + 4 == 12

11 of 42

Add a node subclass

class PositiveInteger(Integer):

def __init__(self, value):

assert value > 0

super().__init__(value)

x = PositiveInteger(3)

assert isinstance(x, Integer)

assert calculate(x) == 3

12 of 42

Add a new function

def pretty(node):

if isinstance(node, Integer):

return f"{node.value}"

elif isinstance(node, Add):

return f"({pretty(node.left)} + {pretty(node.right)})"

elif isinstance(node, Multiply):

return f"({pretty(node.left)} * {pretty(node.right)})"

elif isinstance(node, Power):

return f"({pretty(node.base)} ^ {pretty(node.exponent)})"

else:

raise NotImplementedError

assert pretty(tree) == "((3 + 5) * (4 + 7))"

13 of 42

Summary for "one big function"

Pros

Simple data-only objects
Obvious control flow
Easy to debug

Cons

Monolithic
Data and behavior far apart
Inevitably too long
New functions duplicate code

14 of 42

2. Object-oriented programming

15 of 42

Implement calculate using polymorphism

class Node:

def calculate(self):

raise NotImplementedError

class Integer(Node):

...

def calculate(self):

return self.value

class Add(Node):

...

def calculate(self):

return (

self.left.calculate() +

self.right.calculate()

)

class Multiply(Node):

...

def calculate(self):

return (

self.left.calculate() *

self.right.calculate()

)

16 of 42

Exercise the calculate method

# One big function

assert calculate(tree) == 88

# What it's doing

calculate(tree)

calculate(node.left)

calculate(node.left) == 3

calculate(node.right) == 5

== 3 + 5 == 8

calculate(node.right)

calculate(node.left) == 4

calculate(node.right) == 7

== 4 + 7 == 11

== 8 * 11 == 88

# OOP

assert tree.calculate() == 88

# What it's doing

Multiply.calculate(tree)

Add.calculate(self.left)

Integer.calculate(self.left) == 3

Integer.calculate(self.right) == 5

== 3 + 5 == 8

Add.calculate(self.right)

Integer.calculate(self.left) == 4

Integer.calculate(self.right) == 7

== 4 + 7 == 11

== 8 * 11 == 88

17 of 42

Add a new node type

class Power(Node):

def __init__(self, base, exponent):

self.base = base

self.exponent = exponent

def calculate(self):

return (

self.base.calculate() **

self.exponent.calculate()

)

18 of 42

Use a new node type

formula = "(2 ^ 3) + 4"

tree = parse_formula(formula)

# tree == Add(

# Power(

# Integer(2),

# Integer(3),

# ),

# Integer(4),

# )

assert tree.calculate() == 12

# What it's doing

Add.calculate(tree)

Power.calculate(self.left)

Integer.calculate(self.base) == 2

Integer.calculate(self.exponent) == 3

== 2 ** 3 == 8

Integer.calculate(self.right) == 4

== 8 + 4 == 12

Add will dispatch "calculate" calls to Power

19 of 42

Add a node subclass

class PositiveInteger(Integer):

def __init__(self, value):

assert value > 0

super().__init__(value)

tree = Add(PositiveInteger(3), Integer(-1))

assert tree.calculate() == 2

20 of 42

Add another method

class Node:

def calculate(self):

raise NotImplementedError

def pretty(self):

raise NotImplementedError

21 of 42

Implement pretty for all node classes

class Multiply(Node):

...

def pretty(self):

return (

f"({self.left.pretty()} *"

f" {self.right.pretty()})"

)

class Power(Node):

...

def pretty(self):

return (

f"({self.base.pretty()} ^"

f" {self.exponent.pretty()})"

)

class Integer(Node):

...

def pretty(self):

return f"{self.value}"

class Add(Node):

...

def pretty(self):

return (

f"({self.left.pretty()} +"

f" {self.right.pretty()})"

)

22 of 42

Use the pretty method

formula = "2 * 10 ^ (3 + 4)"

tree = parse_formula(formula)

# tree == Multiply(

# Integer(2),

# Power(

# Integer(10),

# Add(Integer(3), Integer(4)),

# )

assert tree.pretty() == "(2 * (10 ^ (3 + 4)))"

23 of 42

Imagine we need more methods

class Node:

def calculate(self):

raise NotImplementedError

def pretty(self):

raise NotImplementedError

def solve(self):

raise NotImplementedError

def derivative(self):

raise NotImplementedError

# And 20 more...

24 of 42

What you get with OOP

One file per class

What really you want

One file per feature

solve.py

def solve_add(...):

def solve_multiply(...):

def solve_power(...):

def solve_integer(...):

...

derivative.py

def deriv_add(...):

def deriv_multiply(...):

def deriv_power(...):

def deriv_integer(...):

...

pretty.py

def pretty_add(...):

def pretty_multi(...):

def pretty_power(...):

def pretty_integer(...):

...

calculate.py

def calc_add(...):

def calc_multiply(...):

def calc_power(...):

def calc_integer(...):

...

integer.py

class Integer:

def calc(...):

def pretty(...):

def solve(...):

def derivative(...):

...

power.py

class Power:

def calc(...):

def pretty(...):

def solve(...):

def derivative(...):

...

add.py

class Add:

def calc(...):

def pretty(...):

def solve(...):

def derivative(...):

...

multiply.py

class Multiply:

def calc(...):

def pretty(...):

def solve(...):

def derivative(...):

...

But this type of module-per-class code organization can cause serious maintainability problems in production systems.
The critical issue is that all of the 25 new methods might actually be quite different from one other
When you’re editing and debugging code, the view you need is within each of the larger, independent systems
But with OOP these systems must be implemented across all of the classes.

That means that in practice, for this hypothetical example, the OOP approach could cause you to jump between 25 different files in order to accomplish simple programming tasks.
The code appears to be organized along the wrong axis.
You’ll almost never need to look at two independent systems for a single class at the same time, but that’s how the source files are laid out.

Sometimes people call variations of this conundrum "the expression problem"

25 of 42

What you get with OOP

Scattered dependencies

What really you want

Isolated dependencies

numerical library

integer.py

power.py

multiply.py

add.py

formatting library

symbolic math library

differentiation library

derivative.py

solve.py

pretty.py

calculate.py

26 of 42

Summary for "OOP"

Cons

Behavior spread across classes
Scattered dependencies
Dispatching is magical

Pros

Behavior next to data
Easy to add more methods
Avoids dispatching duplication

27 of 42

3: Dynamic dispatch

28 of 42

Background: Using the singledispatch decorator

from functools import singledispatch

@singledispatch

def my_print(value):

print(f"Unexpected: {type(value)}, {value!r}")

@my_print.register(int)

def _(value):

print("Integer!", value)

@my_print.register(float)

def _(value):

print("Float!", value)

29 of 42

Background: Calling a singledispatch function

my_print(10)

my_print(1.23)

my_print("hello")

>>>

Integer! 10

Float! 1.23

Unexpected: <class 'str'>, 'hello'

30 of 42

Implement calculate using singledispatch

@singledispatch

def calculate(node):

raise NotImplementedError

@calculate.register(Integer)

def _(node):

return node.value

@calculate.register(Add)

def _(node):

return (

calculate(node.left) +

calculate(node.right)

)

@calculate.register(Multiply)

def _(node):

return (

calculate(node.left) *

calculate(node.right)

)

31 of 42

Exercise the calculate dispatching function

formula = "(2 + 3) * 4"

tree = parse_formula(formula)

# tree == Multiply(

# Add(

# Integer(2),

# Integer(3),

# ),

# Integer(4),

# )

assert calculate(tree) == 20

# What it's doing

calculate(tree)

calculate(node.left)

calculate(node.left) == 2

calculate(node.right) == 3

== 2 + 3 == 5

calculate(node.right) == 4

== 5 * 4 == 20

32 of 42

Add a new node type

class Power:

def __init__(self, base, exponent):

self.base = base

self.exponent = exponent

@calculate.register(Power)

def _(node):

return (

calculate(node.base) **

calculate(node.exponent)

)

33 of 42

Use a new node type

formula = "(2 ^ 3) + 4"

tree = parse_formula(formula)

# tree == Add(

# Power(

# Integer(2),

# Integer(3),

# ),

# Integer(4),

# )

assert calculate(tree) == 12

# What it's doing

calculate(tree)

calculate(node.left)

calculate(node.base) == 2

calculate(node.exponent) == 3

== 2 ** 3 == 8

calculate(node.right) == 4

== 8 + 4 == 12

34 of 42

Add a node subclass

class PositiveInteger(Integer):

def __init__(self, value):

assert value > 0

super().__init__(value)

tree = Add(PositiveInteger(3), Integer(-1))

assert calculate(tree) == 2

35 of 42

Add another function

@pretty.register(Multiply)

def _(node):

return (

f"({pretty(node.left)} *"

f" {pretty(node.right)})"

)

@pretty.register(Power)

def _(node):

return (

f"({pretty(node.base)} ^"

f" {pretty(node.exponent)})"

)

@singledispatch

def pretty(node):

raise NotImplementedError

@pretty.register(Integer)

def _(node):

return f"{node.value}"

@pretty.register(Add)

def _(node):

return (

f"({pretty(node.left)} +"

f" {pretty(node.right)})"

)

36 of 42

Use the pretty dispatching function

formula = "2 * 10 ^ (3 + 4)"

tree = parse_formula(formula)

# tree == Multiply(

# Integer(2),

# Power(

# Integer(10),

# Add(Integer(3), Integer(4)),

# )

assert pretty(tree) == "(2 * (10 ^ (3 + 4)))"

37 of 42

Summary for "dynamic dispatch"

Cons

Data and behavior separate
Less encapsulation
More friction adding new classes

Pros

Simple data objects
Behavior next to behavior
Isolated dependencies
Code organized on correct axis

When you add a new type to the code, you need to add a corresponding implementation for every dispatch function you want to support in all of the different files.

In contrast, with object-oriented polymorphism, new classes might seem easier to add—just implement the required methods—but adding a new method to the system requires updating every class.

With single dispatch, you can have thousands of data structures and hundreds of behaviors in the program without polluting the class definitions with methods.

Using the single dispatch approach like this organizes the code on the correct axis: All of the related behaviors are together instead of spread across countless modules where OOP classes reside. Ultimately, this makes it easier to maintain, debug, extend, refactor, and test your code.
This allows you to create independent systems of behavior in completely separate modules with no interdependencies on each other and a narrow set of external dependencies.
Simple data structures can live at the bottom of your program’s dependency tree and be shared across the whole codebase without high coupling.

38 of 42

Bonus: How does singledispatch work

from collections import defaultdict

dispatch_map = defaultdict(dict)

def register_dispatch(dispatch_func, kind, func):

kind_map = dispatch_map[dispatch_func]

kind_map[kind] = func

def call_dispatch(dispatch_func, value, *args, **kwargs):

kind_map = dispatch_map[dispatch_func]

for kind in type(value).__mro__:

if kind in kind_map:

func = kind_map[kind]

return func(value, *args, **kwargs)

return dispatch_func(value, *args, **kwargs)

39 of 42

Bonus: How does singledispatch work

from functools import wraps

def my_dispatch(dispatch_func):

@wraps(dispatch_func)

def inner(*args, **kwargs):

return call_dispatch(dispatch_func, *args, **kwargs)

setattr(inner, "register", register_helper(dispatch_func))

return inner

def register_helper(dispatch_func):

def outer(kind):

def decorator(func):

register_dispatch(dispatch_func, kind, func)

return func

return decorator

return outer

40 of 42

Bonus: How does singledispatch work

@my_dispatch

def my_print(value):

print(f"Default implementation: {value}")

@my_print.register(int)

def _(value):

print(f"Integer print: {value}")

@my_print.register(float)

def _(value):

print(f"Float print: {value}")

my_print(5)

my_print(1.23)

my_print("unknown")

41 of 42

Conclusion

One big function: Fine to start, but it rapidly deteriorates
OOP: Good when classes share behaviors & larger systems are strongly interconnected
Dynamic dispatch: Good when data types are shared, but larger systems are independent
Mixing OOP and dynamic dispatch: Good when both cohesion in the small & system decoupling in the large are needed

42 of 42

Slides, code, & 35% book discount

github.com/bslatkin/pyconnl24

@haxor

onebigfluke.com