Let’s play: Code Review
how to write better python code first time
Dominik 'Disconnect3d' Czarnota
ThaiPy, 19.04.2018
1
# whoami
https://github.com/disconnect3d/
disconnect3d # irc.freenode.net
2
Ways to approach this talk...
3
for i in range(len(collection)):� item = collection[i]� # …
4
for i in range(len(collection)):� item = collection[i]� # …
for item in collection:� # …
5
for i in range(len(collection)):� item = collection[i]� # …
for item in collection:� # …
for idx, item in enumerate(collection):� # …
6
If you need both item and index...
2) Python batteries
7
2) Python batteries
8
2) Python batteries
9
3) Design patterns/principles
10
5) Tooling
11
6) Git commit
12
6) Git commit
13
6) Git commit
14
Lol wut
This
S u c k s
6) Git commit
15
7) Be a human
16
Okay enough
17
Let’s do some reviews?
18
Example #1
19
charref = re.compile("&#(0[0-7]+"� "|[0-9]+"� "|x[0-9a-fA-F]+);")�
—————————————————————————————————————————————————————————————————————————————————
20
What does it do?
charref = re.compile("&#(0[0-7]+"� "|[0-9]+"� "|x[0-9a-fA-F]+);")�
—————————————————————————————————————————————————————————————————————————————————
charref = re.compile(r"""� &[#] # Start of a numeric entity reference� (� 0[0-7]+ # Octal form� | [0-9]+ # Decimal form� | x[0-9a-fA-F]+ # Hexadecimal form� )� ; # Trailing semicolon�""", re.VERBOSE)
source: https://docs.python.org/3/howto/regex.html#compilation-flags �
21
What does it do?
Example #2
22
class AbstractCpu:
def execute(self):� """Decode, and execute one instruction pointed by register PC"""� # (...)�� try:
try:� getattr(self, name)(*insn.operands)�
except AttributeError:� text_bytes = ' '.join('%02x' % x for x in insn.bytes)� logger.info("Unimplemented instruction: 0x%016x:\t%s\t%s\t%s",� insn.address, text_bytes, insn.mnemonic, insn.op_str)� self.emulate(insn)
except (Interruption, Syscall) as e:� e.on_handled = lambda: self._publish_instruction_as_executed(insn)� raise e
else:� self._publish_instruction_as_executed(insn)
23
class SomeCpu(AbstractCpu):� def CMP(self, op1, op2): # ...� def MOV(self, op1, op2): # ...
class AbstractCpu:
def execute(self):� """Decode, and execute one instruction pointed by register PC"""� # (...)�� try:
try:� getattr(self, name)(*insn.operands)�
except AttributeError:� text_bytes = ' '.join('%02x' % x for x in insn.bytes)� logger.info("Unimplemented instruction: 0x%016x:\t%s\t%s\t%s",� insn.address, text_bytes, insn.mnemonic, insn.op_str)� self.emulate(insn)
except (Interruption, Syscall) as e:� e.on_handled = lambda: self._publish_instruction_as_executed(insn)� raise e
else:� self._publish_instruction_as_executed(insn)
24
class SomeCpu(AbstractCpu):� def CMP(self, op1, op2): # ...� def MOV(self, op1, op2): # ...
class AbstractCpu:
def execute(self):� """Decode, and execute one instruction pointed by register PC"""� # (...)�� try:
try:� getattr(self, name)(*insn.operands)�
except AttributeError:� text_bytes = ' '.join('%02x' % x for x in insn.bytes)� logger.info("Unimplemented instruction: 0x%016x:\t%s\t%s\t%s",� insn.address, text_bytes, insn.mnemonic, insn.op_str)� self.emulate(insn)
except (Interruption, Syscall) as e:� e.on_handled = lambda: self._publish_instruction_as_executed(insn)� raise e
else:� self._publish_instruction_as_executed(insn)
25
class SomeCpu(AbstractCpu):� def CMP(self, op1, op2): # ...� def MOV(self, op1, op2): # ...
class AbstractCpu:
def execute(self):� """Decode, and execute one instruction pointed by register PC"""� # (...)�� try:
try:� getattr(self, name)(*insn.operands)�
except AttributeError:� text_bytes = ' '.join('%02x' % x for x in insn.bytes)� logger.info("Unimplemented instruction: 0x%016x:\t%s\t%s\t%s",� insn.address, text_bytes, insn.mnemonic, insn.op_str)� self.emulate(insn)
except (Interruption, Syscall) as e:� e.on_handled = lambda: self._publish_instruction_as_executed(insn)� raise e
else:� self._publish_instruction_as_executed(insn)
26
class SomeCpu(AbstractCpu):� def CMP(self, op1, op2): # ...� def MOV(self, op1, op2): # ...
class AbstractCpu:�
def execute(self):� """Decode, and execute one instruction pointed by register PC"""� # (...)�� try:
try:� getattr(self, name)(*insn.operands)�
except AttributeError:� text_bytes = ' '.join('%02x' % x for x in insn.bytes)� logger.info("Unimplemented instruction: 0x%016x:\t%s\t%s\t%s",� insn.address, text_bytes, insn.mnemonic, insn.op_str)� self.emulate(insn)
except (Interruption, Syscall) as e:� e.on_handled = lambda: self._publish_instruction_as_executed(insn)� raise e
else:� self._publish_instruction_as_executed(insn)
27
class SomeCpu(AbstractCpu):� def CMP(self, op1, op2): # ...� def MOV(self, op1, op2): # ...
class AbstractCpu:�
def execute(self):� """Decode, and execute one instruction pointed by register PC"""� # (...)�� try:
try:� getattr(self, name)(*insn.operands)�
except AttributeError:� text_bytes = ' '.join('%02x' % x for x in insn.bytes)� logger.info("Unimplemented instruction: 0x%016x:\t%s\t%s\t%s",� insn.address, text_bytes, insn.mnemonic, insn.op_str)� self.emulate(insn)
except (Interruption, Syscall) as e:� e.on_handled = lambda: self._publish_instruction_as_executed(insn)� raise e
else:� self._publish_instruction_as_executed(insn)
28
class SomeCpu(AbstractCpu):� def CMP(self, op1, op2): # ...� def MOV(self, op1, op2): # ...
class AbstractCpu:�
def execute(self):� """Decode, and execute one instruction pointed by register PC"""� # (...)�� try:
try:� getattr(self, name)(*insn.operands)�
except AttributeError:� text_bytes = ' '.join('%02x' % x for x in insn.bytes)� logger.info("Unimplemented instruction: 0x%016x:\t%s\t%s\t%s",� insn.address, text_bytes, insn.mnemonic, insn.op_str)� self.emulate(insn)
except (Interruption, Syscall) as e:� e.on_handled = lambda: self._publish_instruction_as_executed(insn)� raise e
else:� self._publish_instruction_as_executed(insn)
29
class SomeCpu(AbstractCpu):� def CMP(self, op1, op2): # ...� def MOV(self, op1, op2): # ...
class AbstractCpu:�
def execute(self):� """Decode, and execute one instruction pointed by register PC"""� # (...)�� try:
try:� getattr(self, name)(*insn.operands)�
except AttributeError:� text_bytes = ' '.join('%02x' % x for x in insn.bytes)� logger.info("Unimplemented instruction: 0x%016x:\t%s\t%s\t%s",� insn.address, text_bytes, insn.mnemonic, insn.op_str)� self.emulate(insn)
except (Interruption, Syscall) as e:� e.on_handled = lambda: self._publish_instruction_as_executed(insn)� raise e
else:� self._publish_instruction_as_executed(insn)
30
Is this code okay?
class AbstractCpu:�
def execute(self):� """Decode, and execute one instruction pointed by register PC"""� # (...)�� try:
try:� getattr(self, name)(*insn.operands)�
except AttributeError:� text_bytes = ' '.join('%02x' % x for x in insn.bytes)� logger.info("Unimplemented instruction: 0x%016x:\t%s\t%s\t%s",� insn.address, text_bytes, insn.mnemonic, insn.op_str)� self.emulate(insn)
except (Interruption, Syscall) as e:� e.on_handled = lambda: self._publish_instruction_as_executed(insn)� raise e
else:� self._publish_instruction_as_executed(insn)
31
What if…
CMP/MOV/other instruction raises AttributeError?
class AbstractCpu:�
def execute(self):� """Decode, and execute one instruction pointed by register PC"""� # (...)�� try:
try:� getattr(self, name)(*insn.operands)�
except AttributeError:� text_bytes = ' '.join('%02x' % x for x in insn.bytes)� logger.info("Unimplemented instruction: 0x%016x:\t%s\t%s\t%s",� insn.address, text_bytes, insn.mnemonic, insn.op_str)� self.emulate(insn)
except (Interruption, Syscall) as e:� e.on_handled = lambda: self._publish_instruction_as_executed(insn)� raise e
else:� self._publish_instruction_as_executed(insn)
32
Then we will emulate it instead...
class AbstractCpu:�
def execute(self):� """Decode, and execute one instruction pointed by register PC"""� # (...)�� try:
try:� getattr(self, name)(*insn.operands)�
except AttributeError:� text_bytes = ' '.join('%02x' % x for x in insn.bytes)� logger.info("Unimplemented instruction: 0x%016x:\t%s\t%s\t%s",� insn.address, text_bytes, insn.mnemonic, insn.op_str)� self.emulate(insn)
except (Interruption, Syscall) as e:� e.on_handled = lambda: self._publish_instruction_as_executed(insn)� raise e
else:� self._publish_instruction_as_executed(insn)
33
class AbstractCpu:�
def execute(self):� """Decode, and execute one instruction pointed by register PC"""� # (...)�� try:
implementation = getattr(self, name, None)�� if implementation is not None:� implementation(*insn.operands)� else:� text_bytes = ' '.join('%02x' % x for x in insn.bytes)� logger.info("Unimplemented instruction: 0x%016x:\t%s\t%s\t%s",� insn.address, text_bytes, insn.mnemonic, insn.op_str)� self.emulate(insn)
except (Interruption, Syscall) as e:� e.on_handled = lambda: self._publish_instruction_as_executed(insn)� raise e
else:� self._publish_instruction_as_executed(insn)
34
Example #3
35
import struct��FIELDS = ['id', 'time_stamp', 'value']�STRUCT_FMT = 'QII'
�class Feature:� def __init__(self, values):� self._tuple = values�� @staticmethod� def create(**kwargs):� return Feature(tuple(kwargs.get(field) for field in FIELDS))�� @property� def time_stamp(self):� return self._tuple[1]�� def serialize(self):� return struct.pack(self.STRUCT_FMT, *self._tuple)�� @staticmethod� def deserialize(buffer):� values = struct.unpack(Feature.STRUCT_FMT, buffer)� return Feature(values)
36
import struct��FIELDS = ['id', 'time_stamp', 'value']�STRUCT_FMT = 'QII'
�class Feature:� def __init__(self, values):� self._tuple = values�� @staticmethod� def create(**kwargs):� return Feature(tuple(kwargs.get(field) for field in FIELDS))�� @property� def time_stamp(self):� return self._tuple[1]�� def serialize(self):� return struct.pack(self.STRUCT_FMT, *self._tuple)�� @staticmethod� def deserialize(buffer):� values = struct.unpack(Feature.STRUCT_FMT, buffer)� return Feature(values)
37
A property for each field
import struct��FIELDS = ['id', 'time_stamp', 'value']�STRUCT_FMT = 'QII'
�class Feature:� def __init__(self, values):� self._tuple = values�� @staticmethod� def create(**kwargs):� return Feature(tuple(kwargs.get(field) for field in FIELDS))�� @property� def time_stamp(self):� return self._tuple[1]�� def serialize(self):� return struct.pack(self.STRUCT_FMT, *self._tuple)�� @staticmethod� def deserialize(buffer):� values = struct.unpack(Feature.STRUCT_FMT, buffer)� return Feature(values)
38
import struct��FIELDS = ['id', 'time_stamp', 'value']�STRUCT_FMT = 'QII'
�class Feature:� def __init__(self, values):� self._tuple = values�� @staticmethod� def create(**kwargs):� return Feature(tuple(kwargs.get(field) for field in FIELDS))�� @property� def time_stamp(self):� return self._tuple[1]�� def serialize(self):� return struct.pack(self.STRUCT_FMT, *self._tuple)�� @staticmethod� def deserialize(buffer):� values = struct.unpack(Feature.STRUCT_FMT, buffer)� return Feature(values)
39
In [2]: struct.pack('I', 0xDEADBABE)�Out[2]: b'\xbe\xba\xad\xde'��In [3]: struct.unpack('I', b'\xbe\xba\xad\xde')�Out[3]: (3735927486,)
import struct��FIELDS = ['id', 'time_stamp', 'value']�STRUCT_FMT = 'QII'
�class Feature:� def __init__(self, values):� self._tuple = values�� @staticmethod� def create(**kwargs):� return Feature(tuple(kwargs.get(field) for field in FIELDS))�� @property� def time_stamp(self):� return self._tuple[1]�� def serialize(self):� return struct.pack(self.STRUCT_FMT, *self._tuple)�� @staticmethod� def deserialize(buffer):� values = struct.unpack(Feature.STRUCT_FMT, buffer)� return Feature(values)
40
import struct��FIELDS = ['id', 'time_stamp', 'value']�STRUCT_FMT = 'QII'
�class Feature:� def __init__(self, id=None, time_stamp=None, value=None):� self._tuple = tuple(id, time_stamp, value)�� @staticmethod� def create(**kwargs):� return Feature(tuple(kwargs.get(field) for field in FIELDS))�� @property� def time_stamp(self):� return self._tuple[1]�� def serialize(self):� return struct.pack(self.STRUCT_FMT, *self._tuple)�� @staticmethod� def deserialize(buffer):� values = struct.unpack(Feature.STRUCT_FMT, buffer)� return Feature(values)
41
import struct��FIELDS = ['id', 'time_stamp', 'value']�STRUCT_FMT = 'QII'
�class Feature:� def __init__(self, id=None, time_stamp=None, value=None):� self._tuple = tuple(id, time_stamp, value)�� @property� def time_stamp(self):� return self._tuple[1]�� def serialize(self):� return struct.pack(self.STRUCT_FMT, *self._tuple)�� @staticmethod� def deserialize(buffer):� values = struct.unpack(Feature.STRUCT_FMT, buffer)� return Feature(values)
42
import struct��FIELDS = ['id', 'time_stamp', 'value']�STRUCT_FMT = 'QII'
�class Feature:� def __init__(self, id=None, time_stamp=None, value=None):� self._tuple = tuple(id, time_stamp, value)�� @property� def time_stamp(self):� return self._tuple[1]�� def serialize(self):� return struct.pack(self.STRUCT_FMT, *self._tuple)�� @staticmethod� def deserialize(buffer):� values = struct.unpack(Feature.STRUCT_FMT, buffer)� return Feature(values)
43
Is this okay?
import ctypes���class Feature(ctypes.LittleEndianStructure):� _fields_ = (('id', ctypes.c_uint64),� ('time_stamp', ctypes.c_uint32),� ('value', ctypes.c_uint32))�� def serialize(self):� return bytes(self)� � @classmethod� def deserialize(cls, buf):� return cls.from_buffer_copy(buf)
44
import ctypes���class Feature(ctypes.LittleEndianStructure):� _fields_ = (('id', ctypes.c_uint64),� ('time_stamp', ctypes.c_uint32),� ('value', ctypes.c_uint32))�� def serialize(self):� return bytes(self)� � @classmethod� def deserialize(cls, buf):� return cls.from_buffer_copy(buf)
45
import ctypes���class Feature(ctypes.LittleEndianStructure):� """
# ...� Serialization: bytes(feature)� Deserialization: Feature.from_buffer_copy(buf)� """�� _fields_ = (('id', ctypes.c_uint64),� ('time_stamp', ctypes.c_uint32),� ('value', ctypes.c_uint32))�
46
import ctypes���class Feature(ctypes.LittleEndianStructure):� """
# ...� Serialization: bytes(feature)� Deserialization: Feature.from_buffer_copy(buf)� """�� _fields_ = (('id', ctypes.c_uint64),� ('time_stamp', ctypes.c_uint32),� ('value', ctypes.c_uint32))
Usage:
In [2]: f = Feature(id=1, time_stamp=0xFACE, value=3)��In [3]: bytes(f)�Out[3]: b'\x01\x00\x00\x00\x00\x00\x00\x00\xce\xfa\x00\x00\x03\x00\x00\x00'��In [4]: Feature.from_buffer_copy(_3)�Out[4]: <__main__.Feature at 0x7f08200e8268>
�
47
import ctypes���class Feature(ctypes.LittleEndianStructure):� """
# ...� Serialization: bytes(feature)� Deserialization: Feature.from_buffer_copy(buf)� """�� _fields_ = (('id', ctypes.c_uint64),� ('time_stamp', ctypes.c_uint32),� ('value', ctypes.c_uint32))
Usage:
In [2]: f = Feature(id=1, time_stamp=0xFACE, value=3)��In [3]: bytes(f)�Out[3]: b'\x01\x00\x00\x00\x00\x00\x00\x00\xce\xfa\x00\x00\x03\x00\x00\x00'��In [4]: Feature.from_buffer_copy(_3)�Out[4]: <__main__.Feature at 0x7f08200e8268>
�
48
Example #4
49
class SomeTable:� TABLE_NAME = 'some_table'� COL_NAMES = ('timestamp', 'some_col', ...)
def __init__(self, client):� self.client = client� self.table = self.client.open(self.TABLE_NAME)
def scan(self, ts_min, ts_max):� scanner = self.table.scanner()� scanner.add_predicates(� self.table['timestamp'] >= ts_min,� self.table['timestamp'] <= ts_max,� )� tuples = scanner.get_tuples()
mapping = lambda t: (t[0], t[1], t[2], ...)�
values = list(map(mapping, tuples))
� return pandas.df(values, columns=self.COL_NAMES)
50
class SomeTable:� TABLE_NAME = 'some_table'� COL_NAMES = ('timestamp', 'some_col', ...)
def __init__(self, client):� self.client = client� self.table = self.client.open(self.TABLE_NAME)
def scan(self, ts_min, ts_max):� scanner = self.table.scanner()� scanner.add_predicates(� self.table['timestamp'] >= ts_min,� self.table['timestamp'] <= ts_max,� )� tuples = scanner.get_tuples()
mapping = lambda t: (t[0], t[1], t[2], ...)�
values = list(map(mapping, tuples))
� return pandas.df(values, columns=self.COL_NAMES)
51
class SomeTable:� TABLE_NAME = 'some_table'� COL_NAMES = ('timestamp', 'some_col', ...)
def __init__(self, client):� self.client = client� self.table = self.client.open(self.TABLE_NAME)
def scan(self, ts_min, ts_max):� scanner = self.table.scanner()� scanner.add_predicates(� self.table['timestamp'] >= ts_min,� self.table['timestamp'] <= ts_max,� )� tuples = scanner.get_tuples()
mapping = lambda t: (t[0], t[1], t[2], ...)�
values = list(map(mapping, tuples))
� return pandas.df(values, columns=self.COL_NAMES)
52
class SomeTable:� TABLE_NAME = 'some_table'� COL_NAMES = ('timestamp', 'some_col', ...)
def __init__(self, client):� self.client = client� self.table = self.client.open(self.TABLE_NAME)
def scan(self, ts_min, ts_max):� scanner = self.table.scanner()� scanner.add_predicates(� self.table['timestamp'] >= ts_min,� self.table['timestamp'] <= ts_max,� )� tuples = scanner.get_tuples()
mapping = lambda t: (t[0], t[1], t[2], ...)�
values = list(map(mapping, tuples))
� return pandas.df(values, columns=self.COL_NAMES)
53
class SomeTable:� TABLE_NAME = 'some_table'� COL_NAMES = ('timestamp', 'some_col', ...)
def __init__(self, client):� self.client = client� self.table = self.client.open(self.TABLE_NAME)
def scan(self, ts_min, ts_max):� scanner = self.table.scanner()� scanner.add_predicates(� self.table['timestamp'] >= ts_min,� self.table['timestamp'] <= ts_max,� )� tuples = scanner.get_tuples()
mapping = lambda t: (t[0], t[1], t[2], ...)�
values = list(map(mapping, tuples))
� return pandas.df(values, columns=self.COL_NAMES)
54
So what do you think here?
class SomeTable:� TABLE_NAME = 'some_table'� COL_NAMES = ('timestamp', 'some_col', ...)
def __init__(self, client):� self.client = client� self.table = self.client.open(self.TABLE_NAME)
def scan(self, ts_min, ts_max):� scanner = self.table.scanner()� scanner.add_predicates(� self.table['timestamp'] >= ts_min,� self.table['timestamp'] <= ts_max,� )� tuples = scanner.get_tuples()
mapping = lambda t: (t[0], t[1], t[2], ...)�
values = list(map(mapping, tuples))
� return pandas.df(values, columns=self.COL_NAMES)
55
For me:
class SomeTable:� TABLE_NAME = 'some_table'� COL_NAMES = ('timestamp', 'some_col', ...)
def __init__(self, client):� self.client = client� self.table = self.client.open(self.TABLE_NAME)
def scan(self, ts_min, ts_max):� scanner = self.table.scanner()� scanner.add_predicates(� self.table['timestamp'] >= ts_min,� self.table['timestamp'] <= ts_max,� )� tuples = scanner.get_tuples()
mapping = lambda t: (t[0], t[1], t[2], ...)�
values = list(map(mapping, tuples))
� return pandas.df(values, columns=self.COL_NAMES)
56
For me:
class SomeTable:� TABLE_NAME = 'some_table'� COL_NAMES = ('timestamp', 'some_col', ...)
def __init__(self, client):� self.client = client� self.table = self.client.open(self.TABLE_NAME)
def scan(self, ts_min, ts_max):� scanner = self.table.scanner()� scanner.add_predicates(� self.table['timestamp'] >= ts_min,� self.table['timestamp'] <= ts_max,� )� tuples = scanner.get_tuples()
mapping = lambda t: (t[0], t[1], t[2], ...)�
values = list(map(mapping, tuples))
� return pandas.df(values, columns=self.COL_NAMES)
57
For me:
class SomeTable:� TABLE_NAME = 'some_table'� COL_NAMES = ('timestamp', 'some_col', ...)
def __init__(self, client):� self.client = client� self.table = self.client.open(self.TABLE_NAME)
def scan(self, ts_min, ts_max):� scanner = self.table.scanner()� scanner.add_predicates(� self.table['timestamp'] >= ts_min,� self.table['timestamp'] <= ts_max,� )� tuples = scanner.get_tuples()
mapping = lambda t: (t[0], t[1], t[2], ...)�
values = list(map(mapping, tuples))
� return pandas.df(values, columns=self.COL_NAMES)
58
For me:
class SomeTable:� TABLE_NAME = 'some_table'� COL_NAMES = ('timestamp', 'some_col', ...)
def __init__(self, client):� self.client = client� self.table = self.client.open(self.TABLE_NAME)
def scan(self, ts_min, ts_max):� scanner = self.table.scanner()� scanner.add_predicates(� self.table['timestamp'] >= ts_min,� self.table['timestamp'] <= ts_max,� )� tuples = scanner.get_tuples()
mapping = lambda t: (self._column_getter(col)(t) for col in self.COL_NAMES)�
values = list(map(mapping, tuples))
� return pandas.df(values, columns=self.COL_NAMES)
def _column_getter(col):
idx = self.COL_NAMES.index(col)
return operator.itemgetter(idx)
59
The end