Compare commits

...

10 Commits

Author SHA1 Message Date
Zev Averbach
d6b3bc7429 added two new benchmarks for one new comparator. changed some variable names and string formatters for greater clarity, added a docstring for the same purpose 2022-06-29 15:47:13 +02:00
Anthony Shaw
d1e4a387c6 Merge pull request #5 from pamelafox/patch-2
Add a benchmark for regex related practices
2022-05-28 16:41:50 +10:00
Pamela Fox
a1727824f0 Add a benchmark for regex related practices 2022-05-24 23:12:27 -07:00
Anthony Shaw
b56a7d1ce8 Merge pull request #4 from pamelafox/patch-1
Add benchmark comparing join of gen expression vs join of list comp
2022-05-25 15:06:08 +10:00
Pamela Fox
fe1b482fba Add benchmark comparing join of generator expression vs join of list comprehension 2022-05-24 21:58:00 -07:00
Anthony Shaw
2b8c41b589 Correct the output time 2022-05-17 14:26:10 +10:00
Anthony Shaw
41199a18ab Add class matching 2022-05-17 14:14:21 +10:00
Anthony Shaw
0caf235ab5 Add mapping benchmarks 2022-05-16 13:28:28 +10:00
Anthony Shaw
55e2b6df8a Positive and negative match statements 2022-05-16 12:56:09 +10:00
Anthony Shaw
6d27822648 Refactor a simpler test script 2022-05-16 12:42:15 +10:00
5 changed files with 461 additions and 166 deletions

View File

@@ -1,90 +1,155 @@
from collections import namedtuple
from dataclasses import dataclass
import typing
import sys
def attributes_in_class():
class Pet:
legs: int
noise: str
def __init__(self, legs, noise) -> None:
self.legs = legs
self.noise = noise
def __repr__(self):
return f"<Pet legs={self.legs} noise='{self.noise}'>"
for _ in range(100000):
dog = Pet(4, "woof")
str(dog)
def attributes_in_class_with_slots():
class Pet:
legs: int
noise: str
__slots__ = 'legs', 'noise'
def __init__(self, legs, noise) -> None:
self.legs = legs
self.noise = noise
def __repr__(self):
return f"<Pet legs={self.legs} noise='{self.noise}'>"
for _ in range(100000):
dog = Pet(4, "woof")
str(dog)
def attributes_in_dataclass():
@dataclass
class Pet:
legs: int
noise: str
for _ in range(100000):
dog = Pet(4, "woof")
str(dog)
if sys.version_info.minor >= 10:
def attributes_in_dataclass_with_slots():
@dataclass(slots=True)
class Pet:
legs: int
noise: str
for _ in range(100000):
dog = Pet(4, "woof")
str(dog)
def attributes_in_namedtuple():
Pet = namedtuple("Pet", "legs noise")
for _ in range(100000):
dog = Pet(4, "woof")
str(dog)
def attributes_in_namedtuple_type():
class Pet(typing.NamedTuple):
legs: int
noise: str
for _ in range(100000):
dog = Pet(4, "woof")
str(dog)
def attributes_in_dict():
for _ in range(100000):
dog = {"legs": 4, "noise": "woof"}
str(dog)
__benchmarks__ = [
(attributes_in_dataclass, attributes_in_class, "Class instead of dataclass"),
(attributes_in_dataclass, attributes_in_namedtuple, "Namedtuple instead of dataclass"),
(attributes_in_namedtuple, attributes_in_class, "class instead of namedtuple"),
(attributes_in_namedtuple, attributes_in_namedtuple_type, "namedtuple class instead of namedtuple"),
(attributes_in_class, attributes_in_dict, "dict instead of class"),
(attributes_in_class, attributes_in_class_with_slots, "class with slots")
]
if sys.version_info.minor >= 10:
__benchmarks__.append((attributes_in_dataclass, attributes_in_dataclass_with_slots, "dataclass with slots"))
from collections import namedtuple
from dataclasses import dataclass
import typing
import sys
def attributes_in_class():
class Pet:
legs: int
noise: str
def __init__(self, legs, noise) -> None:
self.legs = legs
self.noise = noise
def __repr__(self):
return f"<Pet legs={self.legs} noise='{self.noise}'>"
for _ in range(100000):
dog = Pet(4, "woof")
str(dog)
def attributes_in_class_with_slots():
class Pet:
legs: int
noise: str
__slots__ = "legs", "noise"
def __init__(self, legs, noise) -> None:
self.legs = legs
self.noise = noise
def __repr__(self):
return f"<Pet legs={self.legs} noise='{self.noise}'>"
for _ in range(100000):
dog = Pet(4, "woof")
str(dog)
def attributes_in_class_with_slots_factory():
lb = "\n "
def class_factory(*attrs):
class_string = f"""
global Klass
class Klass:
__slots__ = {attrs}
def __init__(self, {', '.join(attrs)}):
{lb.join(f'self.{attr}={attr}' for attr in attrs)}
def __repr__(self):
return (
f'<{{self.__class__.__name__}}'
+ "{' '.join(f'{attr}={{getattr(self, {attr})}}' for attr in attrs)}"
)
"""
exec(class_string)
return Klass
Pet = class_factory("legs", "noise")
for _ in range(100000):
dog = Pet(4, "woof")
str(dog)
def attributes_in_dataclass():
@dataclass
class Pet:
legs: int
noise: str
for _ in range(100000):
dog = Pet(4, "woof")
str(dog)
attributes_in_dataclass_with_slots = None
if sys.version_info.minor >= 10:
def attributes_in_dataclass_with_slots():
@dataclass(slots=True)
class Pet:
legs: int
noise: str
for _ in range(100000):
dog = Pet(4, "woof")
str(dog)
def attributes_in_namedtuple():
Pet = namedtuple("Pet", "legs noise")
for _ in range(100000):
dog = Pet(4, "woof")
str(dog)
def attributes_in_namedtuple_type():
class Pet(typing.NamedTuple):
legs: int
noise: str
for _ in range(100000):
dog = Pet(4, "woof")
str(dog)
def attributes_in_dict():
for _ in range(100000):
dog = {"legs": 4, "noise": "woof"}
str(dog)
__benchmarks__ = [
(attributes_in_dataclass, attributes_in_class, "Class instead of dataclass"),
(
attributes_in_dataclass,
attributes_in_namedtuple,
"Namedtuple instead of dataclass",
),
(attributes_in_namedtuple, attributes_in_class, "class instead of namedtuple"),
(
attributes_in_namedtuple,
attributes_in_namedtuple_type,
"namedtuple class instead of namedtuple",
),
(attributes_in_class, attributes_in_dict, "dict instead of class"),
(
attributes_in_class,
attributes_in_class_with_slots,
"class with slots instead of class",
),
(
attributes_in_class_with_slots,
attributes_in_class_with_slots_factory,
"class with slots factory instead of class with slots",
),
(
attributes_in_dict,
attributes_in_class_with_slots_factory,
"class with slots factory instead of dict",
),
]
if attributes_in_dataclass_with_slots:
__benchmarks__.append(
(
attributes_in_dataclass,
attributes_in_dataclass_with_slots,
"dataclass with slots instead of dataclass",
)
)

View File

@@ -9,7 +9,17 @@ def filter_list_as_comprehension():
inputs = range(100_000)
result = [i for i in inputs if i % 2]
def join_generator_expression():
words = ['data', 'type', 'is', 'so', 'long', 'now']
for x in range(100_000):
''.join(ele.title() for ele in words)
def join_list_comprehension():
words = ['data', 'type', 'is', 'so', 'long', 'now']
for x in range(100_000):
''.join([ele.title() for ele in words])
__benchmarks__ = [
(filter_list_as_loop, filter_list_as_comprehension, "Using a list comprehension to filter another list"),
]
(join_generator_expression, join_list_comprehension, "Join list comprehension instead of generator expression"),
]

154
bench_match.py Normal file
View File

@@ -0,0 +1,154 @@
from typing import Mapping, Sequence
def sequence_match_logical():
""" Test matching the first element of a sequence is a frog. """
seq = ["🐸", "🐛", "🦋", "🪲"]
frogs = 0
for _ in range(100_000):
if isinstance(seq, Sequence) and len(seq) > 0 and seq[0] == "🐸":
frogs += 1
assert frogs == 100_000
def sequence_match_statement():
""" Test matching the first element of a sequence is a frog. """
seq = ["🐸", "🐛", "🦋", "🪲"]
frogs = 0
for _ in range(100_000):
match seq:
case ["🐸", *_]: frogs += 1
assert frogs == 100_000
def literal_match_logical():
""" Test matching of literal values"""
seq = ["🐊", "🐛", "🐈", "🦋", "🪲", "🐳"]
butterflies = 0
caterpillars = 0
beetles = 0
for _ in range(100_000):
for x in seq:
if x == "🦋":
butterflies += 1
elif x == "🐛":
caterpillars += 1
elif x == "🪲":
beetles += 1
assert butterflies == 100_000
assert beetles == 100_000
assert caterpillars == 100_000
def literal_match_statement():
""" Test matching of literal values """
seq = ["🐊", "🐛", "🐈", "🦋", "🪲", "🐳"]
butterflies = 0
caterpillars = 0
beetles = 0
for _ in range(100_000):
for x in seq:
match x:
case "🦋": butterflies += 1
case "🐛": caterpillars += 1
case "🪲": beetles += 1
assert butterflies == 100_000
assert beetles == 100_000
assert caterpillars == 100_000
def mapping_match_logical():
""" Test matching of mapping type"""
boats = [
{"🐓": 1, },
{"🦊": 1, "🌽": 1},
{"🐓": 1, "🌽": 1},
{"🐓": 1, "🦊": 1},
]
problems = 0
valid_boats = 0
for _ in range(100_000):
for boat in boats:
if isinstance(boat, Mapping):
if "🐓" in boat and "🌽" in boat:
problems += 1
elif "🐓" in boat and "🦊" in boat:
problems += 1
else:
valid_boats += 1
assert valid_boats == 200_000
assert problems == 200_000
def mapping_match_statement():
""" Test matching of mapping type"""
boats = [
{"🐓": 1, },
{"🦊": 1, "🌽": 1},
{"🐓": 1, "🌽": 1},
{"🐓": 1, "🦊": 1},
]
problems = 0
valid_boats = 0
for _ in range(100_000):
for boat in boats:
match boat:
case {"🐓": _, "🌽": _}: problems += 1
case {"🐓": _, "🦊": _}: problems += 1
case _: valid_boats += 1
assert valid_boats == 200_000
assert problems == 200_000
class Driver:
def __init__(self, name, team, **extra):
self.name = name
self.team = team
self.extra = extra
def bench_class_matching_statement():
drivers = [
Driver(name="Max Verstappen", team="Red Bull", ),
Driver(name="Sergio Perez", team="Red Bull", ),
Driver(name="Charles Leclerc", team="Ferrari", ),
Driver(name="Lewis Hamilton", team="Mercedes", ),
]
for _ in range(100_000):
for driver in drivers:
match driver:
case Driver(name="Max Verstappen"): desc = f"Max Verstappen, the current world #1"
case Driver(name=name, team="Ferrari"): desc = f"{name}, a Ferrari driver!! 🐎"
case Driver(name=name, team=team): desc = f"{name}, a {team} driver."
case _: desc = "Invalid request"
# print(desc)
def bench_class_matching_logical():
drivers = [
Driver(name="Max Verstappen", team="Red Bull", ),
Driver(name="Sergio Perez", team="Red Bull", ),
Driver(name="Charles Leclerc", team="Ferrari", ),
Driver(name="Lewis Hamilton", team="Mercedes", ),
]
for _ in range(100_000):
for driver in drivers:
if not isinstance(driver, Driver):
desc = "Invalid request"
elif driver.name == "Max Verstappen":
desc = f"Max Verstappen, the current world #1"
elif driver.team == "Ferrari":
desc = f"{driver.name}, a Ferrari driver!! 🐎"
else:
desc = f"{driver.name}, a {driver.team} driver."
# print(desc)
__benchmarks__ = [
(sequence_match_logical, sequence_match_statement, "Match statements (sequence)"),
(literal_match_logical, literal_match_statement, "Match statements (literal)"),
(mapping_match_logical, mapping_match_statement, "Match statements (mapping)"),
(bench_class_matching_logical, bench_class_matching_statement, "Match statements (classes)"),
]

35
bench_regex.py Normal file
View File

@@ -0,0 +1,35 @@
import re
def regex_with_anchors():
SNAKE_CASE_RE = re.compile(r'^([a-z]+\d*_[a-z\d_]*|_+[a-z\d]+[a-z\d_]*)$')
tests = ['data_type', 'data_type_', '_dataType', 'dataType', 'data type']
for x in range(100_000):
for test_str in tests:
SNAKE_CASE_RE.match(test_str)
def regex_with_fullmatch():
SNAKE_CASE_RE = re.compile(r'([a-z]+\d*_[a-z\d_]*|_+[a-z\d]+[a-z\d_]*)')
tests = ['data_type', 'data_type_', '_dataType', 'dataType', 'data type']
for x in range(100_000):
for test_str in tests:
SNAKE_CASE_RE.fullmatch(test_str)
def regex_with_ignorecase():
SNAKE_CASE_RE = re.compile(r'([a-z]+\d*_[a-z\d_]*|_+[a-z\d]+[a-z\d_]*)', re.IGNORECASE)
tests = ['data_type', 'data_type_URL', '_DataType', 'DataTypeURL', 'Data Type URL']
for x in range(100_000):
for test_str in tests:
SNAKE_CASE_RE.fullmatch(test_str)
def regex_with_capitalrange():
SNAKE_CASE_RE = re.compile(r'([a-zA-Z]+\d*_[a-zA-Z\d_]*|_+[a-zA-Z\d]+[a-zA-Z\d_]*)')
tests = ['data_type', 'data_type_URL', '_DataType', 'DataTypeURL', 'Data Type URL']
for x in range(100_000):
for test_str in tests:
SNAKE_CASE_RE.fullmatch(test_str)
__benchmarks__ = [
(regex_with_anchors, regex_with_fullmatch, "Using fullmatch instead of anchors"),
(regex_with_ignorecase, regex_with_capitalrange, "Using a-zA-Z instead of IGNORECASE"),
]

181
suite.py
View File

@@ -1,75 +1,106 @@
"""
A benchmark suite for Performance Anti-Patterns
"""
import timeit
import pathlib
import sys
from statistics import fmean
from rich.console import Console
from rich.table import Table
from rich.text import Text
REPEAT = 5
TIMES = 5
if __name__ == "__main__":
table = Table(title=f"Anti-Pattern Benchmark Suite, repeat={REPEAT}, number={TIMES}")
table.add_column("Pattern", justify="right", style="cyan", no_wrap=True)
table.add_column("Benchmark", justify="right", style="cyan", no_wrap=True)
table.add_column("Repeat", style="magenta")
table.add_column("Min", style="magenta", width=7)
table.add_column("Max", style="magenta", width=7)
table.add_column("Mean", style="magenta", width=7)
table.add_column("Min (+)", style="blue", width=15)
table.add_column("Max (+)", style="blue", width=15)
table.add_column("Mean (+)", style="blue", width=15)
profiles_out = pathlib.Path(__file__).parent / 'profiles'
if not profiles_out.exists():
profiles_out.mkdir()
n = 0
for f in pathlib.Path(__file__).parent.glob("bench_*.py"):
if len(sys.argv) > 1 and f.stem != f"bench_{sys.argv[1]}":
continue
i = __import__(f.stem, globals(), locals(), )
if hasattr(i, "__benchmarks__"):
for benchmark in i.__benchmarks__:
n += 1
func1, func2, desc = benchmark
without_result = timeit.repeat(func1, repeat=REPEAT, number=TIMES)
with_result = timeit.repeat(func2, repeat=REPEAT, number=TIMES)
delta_mean = (abs(fmean(with_result) - fmean(without_result)) / fmean(without_result)) * 100.0
delta_min = (abs(min(with_result) - min(without_result)) / min(without_result)) * 100.0
delta_max = (abs(max(with_result) - max(without_result)) / max(without_result)) * 100.0
if min(with_result) < min(without_result):
fdelta_min = Text(f"{min(with_result):.3f} ({delta_min:.1f}%)", style="green")
else:
fdelta_min = Text(f"{min(with_result):.3f} (-{delta_min:.1f}%)", style="red")
if max(with_result) < max(without_result):
fdelta_max = Text(f"{max(with_result):.3f} ({delta_max:.1f}%)", style="green")
else:
fdelta_max = Text(f"{max(with_result):.3f} (-{delta_max:.1f}%)", style="red")
if fmean(with_result) < fmean(without_result):
fdelta_mean = Text(f"{fmean(with_result):.3f} ({delta_mean:.1f}%)", style="green")
else:
fdelta_mean = Text(f"{fmean(with_result):.3f} (-{delta_mean:.1f}%)", style="red")
table.add_row(str(n),
desc,
str(TIMES * REPEAT),
"{:.3f}".format(min(without_result)),
"{:.3f}".format(max(without_result)),
"{:.3f}".format(fmean(without_result)),
fdelta_min,
fdelta_max,
fdelta_mean,
)
console = Console(width=150)
console.print(table)
"""
A benchmark suite for Performance Anti-Patterns
"""
import timeit
import pathlib
import sys
from statistics import fmean
from rich.console import Console
from rich.table import Table
from rich.text import Text
REPEAT = 5
TIMES = 5
def format_delta(result: float, comparator: float, delta: float) -> Text:
"""
Color the column a shade of green if the result was faster than the comparator,
red if it was slower, and format the string appropriately with %, minus symbol, etc.
"""
minus = ""
if abs(delta) > 100:
formatter = "{0:.5f} ({1}{2:.1f}%)"
else:
formatter = "{0:.7f} ({1}{2:.1f}%)"
if result < comparator:
if delta < 10:
col = "medium_spring_green"
elif 10 <= delta < 20:
col = "spring_green1"
elif 20 <= delta < 40:
col = "spring_green2"
else:
col = "green1"
return Text(formatter.format(result, minus, delta), style=col)
else:
minus = "-"
return Text(formatter.format(result, minus, delta), style="red")
if __name__ == "__main__":
table = Table(
title=f"Anti-Pattern Benchmark Suite, repeat={REPEAT}, number={TIMES}"
)
table.add_column("Benchmark", justify="right", style="cyan", no_wrap=True)
table.add_column("Min", width=10)
table.add_column("Max", width=10)
table.add_column("Mean", width=10)
table.add_column("Min (+)", style="blue", width=21)
table.add_column("Max (+)", style="blue", width=21)
table.add_column("Mean (+)", style="blue", width=21)
profiles_out = pathlib.Path(__file__).parent / "profiles"
if not profiles_out.exists():
profiles_out.mkdir()
n = 0
for f in pathlib.Path(__file__).parent.glob("bench_*.py"):
if len(sys.argv) > 1 and f.stem != f"bench_{sys.argv[1]}":
continue
i = __import__(
f.stem,
globals(),
locals(),
)
if hasattr(i, "__benchmarks__"):
for benchmark in i.__benchmarks__:
n += 1
func1, func2, desc = benchmark
comparator_result = timeit.repeat(func1, repeat=REPEAT, number=TIMES)
result = timeit.repeat(func2, repeat=REPEAT, number=TIMES)
delta_mean = (
abs(fmean(result) - fmean(comparator_result))
/ fmean(comparator_result)
) * 100.0
delta_min = (
abs(min(result) - min(comparator_result)) / min(comparator_result)
) * 100.0
delta_max = (
abs(max(result) - max(comparator_result)) / max(comparator_result)
) * 100.0
fdelta_min = format_delta(
min(result), min(comparator_result), delta_min
)
fdelta_max = format_delta(
max(result), max(comparator_result), delta_max
)
fdelta_mean = format_delta(
fmean(result), fmean(comparator_result), delta_mean
)
table.add_row(
desc,
"{:.7f}".format(min(comparator_result)),
"{:.7f}".format(max(comparator_result)),
"{:.7f}".format(fmean(comparator_result)),
fdelta_min,
fdelta_max,
fdelta_mean,
)
console = Console(width=150)
console.print(table)