added two new benchmarks for one new comparator. changed some variable names and string formatters for greater clarity, added a docstring for the same purpose

Merge pull request #5 from pamelafox/patch-2
Add a benchmark for regex related practices
2022-06-29 15:47:13 +02:00 · 2022-05-28 16:41:50 +10:00 · 2022-05-24 23:12:27 -07:00 · 2022-05-25 15:06:08 +10:00 · 2022-05-24 21:58:00 -07:00 · 2022-05-17 14:26:10 +10:00
5 changed files with 461 additions and 166 deletions
--- a/bench_class.py
+++ b/bench_class.py
@@ -1,90 +1,155 @@
-from collections import namedtuple
-from dataclasses import dataclass
-import typing
-import sys
-
-def attributes_in_class():
-    class Pet:
-        legs: int
-        noise: str
-
-        def __init__(self, legs, noise) -> None:
-            self.legs = legs
-            self.noise = noise
-        
-        def __repr__(self):
-            return f"<Pet legs={self.legs} noise='{self.noise}'>"
-    
-    for _ in range(100000):
-        dog = Pet(4, "woof")
-        str(dog)
-
-def attributes_in_class_with_slots():
-    class Pet:
-        legs: int
-        noise: str
-        __slots__ = 'legs', 'noise'
-
-        def __init__(self, legs, noise) -> None:
-            self.legs = legs
-            self.noise = noise
-        
-        def __repr__(self):
-            return f"<Pet legs={self.legs} noise='{self.noise}'>"
-    
-    for _ in range(100000):
-        dog = Pet(4, "woof")
-        str(dog)
-
-def attributes_in_dataclass():
-    @dataclass
-    class Pet:
-        legs: int
-        noise: str
-    
-    for _ in range(100000):
-        dog = Pet(4, "woof")
-        str(dog)
-
-if sys.version_info.minor >= 10:
-    def attributes_in_dataclass_with_slots():
-        @dataclass(slots=True)
-        class Pet:
-            legs: int
-            noise: str
-        
-        for _ in range(100000):
-            dog = Pet(4, "woof")
-            str(dog)
-
-def attributes_in_namedtuple():
-    Pet = namedtuple("Pet", "legs noise")
-    for _ in range(100000):
-        dog = Pet(4, "woof")
-        str(dog)
-
-def attributes_in_namedtuple_type():
-    class Pet(typing.NamedTuple):
-        legs: int
-        noise: str
-
-    for _ in range(100000):
-        dog = Pet(4, "woof")
-        str(dog)
-
-def attributes_in_dict():
-    for _ in range(100000):
-        dog = {"legs": 4, "noise": "woof"}
-        str(dog)
-
-__benchmarks__ = [ 
-    (attributes_in_dataclass, attributes_in_class, "Class instead of dataclass"),
-    (attributes_in_dataclass, attributes_in_namedtuple, "Namedtuple instead of dataclass"),
-    (attributes_in_namedtuple, attributes_in_class, "class instead of namedtuple"),
-    (attributes_in_namedtuple, attributes_in_namedtuple_type, "namedtuple class instead of namedtuple"),
-    (attributes_in_class, attributes_in_dict, "dict instead of class"),
-    (attributes_in_class, attributes_in_class_with_slots, "class with slots")
-]
-if sys.version_info.minor >= 10:
-    __benchmarks__.append((attributes_in_dataclass, attributes_in_dataclass_with_slots, "dataclass with slots"))
-
+from collections import namedtuple
+from dataclasses import dataclass
+import typing
+import sys
+
+
+def attributes_in_class():
+    class Pet:
+        legs: int
+        noise: str
+
+        def __init__(self, legs, noise) -> None:
+            self.legs = legs
+            self.noise = noise
+
+        def __repr__(self):
+            return f"<Pet legs={self.legs} noise='{self.noise}'>"
+
+    for _ in range(100000):
+        dog = Pet(4, "woof")
+        str(dog)
+
+
+def attributes_in_class_with_slots():
+    class Pet:
+        legs: int
+        noise: str
+        __slots__ = "legs", "noise"
+
+        def __init__(self, legs, noise) -> None:
+            self.legs = legs
+            self.noise = noise
+
+        def __repr__(self):
+            return f"<Pet legs={self.legs} noise='{self.noise}'>"
+
+    for _ in range(100000):
+        dog = Pet(4, "woof")
+        str(dog)
+
+
+def attributes_in_class_with_slots_factory():
+    lb = "\n        "
+
+    def class_factory(*attrs):
+        class_string = f"""
+global Klass
+class Klass:
+    __slots__ = {attrs}
+
+    def __init__(self, {', '.join(attrs)}):
+        {lb.join(f'self.{attr}={attr}' for attr in attrs)}
+
+    def __repr__(self):
+        return (
+            f'<{{self.__class__.__name__}}' 
+            + "{' '.join(f'{attr}={{getattr(self, {attr})}}' for attr in attrs)}"
+        )
+"""
+        exec(class_string)
+        return Klass
+
+    Pet = class_factory("legs", "noise")
+
+    for _ in range(100000):
+        dog = Pet(4, "woof")
+        str(dog)
+
+
+def attributes_in_dataclass():
+    @dataclass
+    class Pet:
+        legs: int
+        noise: str
+
+    for _ in range(100000):
+        dog = Pet(4, "woof")
+        str(dog)
+
+
+attributes_in_dataclass_with_slots = None
+if sys.version_info.minor >= 10:
+
+    def attributes_in_dataclass_with_slots():
+        @dataclass(slots=True)
+        class Pet:
+            legs: int
+            noise: str
+
+        for _ in range(100000):
+            dog = Pet(4, "woof")
+            str(dog)
+
+
+def attributes_in_namedtuple():
+    Pet = namedtuple("Pet", "legs noise")
+    for _ in range(100000):
+        dog = Pet(4, "woof")
+        str(dog)
+
+
+def attributes_in_namedtuple_type():
+    class Pet(typing.NamedTuple):
+        legs: int
+        noise: str
+
+    for _ in range(100000):
+        dog = Pet(4, "woof")
+        str(dog)
+
+
+def attributes_in_dict():
+    for _ in range(100000):
+        dog = {"legs": 4, "noise": "woof"}
+        str(dog)
+
+
+__benchmarks__ = [
+    (attributes_in_dataclass, attributes_in_class, "Class instead of dataclass"),
+    (
+        attributes_in_dataclass,
+        attributes_in_namedtuple,
+        "Namedtuple instead of dataclass",
+    ),
+    (attributes_in_namedtuple, attributes_in_class, "class instead of namedtuple"),
+    (
+        attributes_in_namedtuple,
+        attributes_in_namedtuple_type,
+        "namedtuple class instead of namedtuple",
+    ),
+    (attributes_in_class, attributes_in_dict, "dict instead of class"),
+    (
+        attributes_in_class,
+        attributes_in_class_with_slots,
+        "class with slots instead of class",
+    ),
+    (
+        attributes_in_class_with_slots,
+        attributes_in_class_with_slots_factory,
+        "class with slots factory instead of class with slots",
+    ),
+    (
+        attributes_in_dict,
+        attributes_in_class_with_slots_factory,
+        "class with slots factory instead of dict",
+    ),
+]
+if attributes_in_dataclass_with_slots:
+    __benchmarks__.append(
+        (
+            attributes_in_dataclass,
+            attributes_in_dataclass_with_slots,
+            "dataclass with slots instead of dataclass",
+        )
+    )
--- a/bench_comprehensions.py
+++ b/bench_comprehensions.py
@@ -9,7 +9,17 @@ def filter_list_as_comprehension():
    inputs = range(100_000)
    result = [i for i in inputs if i % 2]

+def join_generator_expression():
+    words = ['data', 'type', 'is', 'so', 'long', 'now']
+    for x in range(100_000):
+        ''.join(ele.title() for ele in words)
+
+def join_list_comprehension():
+    words = ['data', 'type', 'is', 'so', 'long', 'now']
+    for x in range(100_000):
+        ''.join([ele.title() for ele in words])

 __benchmarks__ = [
    (filter_list_as_loop, filter_list_as_comprehension, "Using a list comprehension to filter another list"),
-]
+    (join_generator_expression, join_list_comprehension, "Join list comprehension instead of generator expression"),
+]
--- a/bench_match.py
+++ b/bench_match.py
@@ -0,0 +1,154 @@
+from typing import Mapping, Sequence
+
+
+def sequence_match_logical():
+    """ Test matching the first element of a sequence is a frog. """
+    seq = ["🐸", "🐛", "🦋", "🪲"]
+    frogs = 0
+    for _ in range(100_000):
+        if isinstance(seq, Sequence) and len(seq) > 0 and seq[0] == "🐸": 
+            frogs += 1
+    
+    assert frogs == 100_000
+
+def sequence_match_statement():
+    """ Test matching the first element of a sequence is a frog. """
+    seq = ["🐸", "🐛", "🦋", "🪲"]
+    frogs = 0
+    for _ in range(100_000):
+        match seq:
+            case ["🐸", *_]: frogs += 1
+    
+    assert frogs == 100_000
+
+def literal_match_logical():
+    """ Test matching of literal values"""
+    seq = ["🐊", "🐛", "🐈", "🦋", "🪲", "🐳"]
+    butterflies = 0
+    caterpillars = 0
+    beetles = 0
+    for _ in range(100_000):
+        for x in seq:
+            if x == "🦋": 
+                butterflies += 1
+            elif x == "🐛":
+                caterpillars += 1
+            elif x == "🪲":
+                beetles += 1
+    
+    assert butterflies == 100_000
+    assert beetles == 100_000
+    assert caterpillars == 100_000
+
+def literal_match_statement():
+    """ Test matching of literal values """
+    seq = ["🐊", "🐛", "🐈", "🦋", "🪲", "🐳"]
+    butterflies = 0
+    caterpillars = 0
+    beetles = 0
+    for _ in range(100_000):
+        for x in seq:
+            match x:
+                case "🦋": butterflies += 1
+                case "🐛": caterpillars += 1
+                case "🪲": beetles += 1
+    
+    assert butterflies == 100_000
+    assert beetles == 100_000
+    assert caterpillars == 100_000
+
+def mapping_match_logical():
+    """ Test matching of mapping type"""
+    boats = [
+        {"🐓": 1, },
+        {"🦊": 1, "🌽": 1},
+        {"🐓": 1, "🌽": 1},
+        {"🐓": 1, "🦊": 1},
+    ]
+    problems = 0
+    valid_boats = 0
+    for _ in range(100_000):
+        for boat in boats:
+            if isinstance(boat, Mapping):
+                if "🐓" in boat and "🌽" in boat: 
+                    problems += 1
+                elif "🐓" in boat and "🦊" in boat: 
+                    problems += 1
+                else:
+                    valid_boats += 1
+                    
+    
+    assert valid_boats == 200_000
+    assert problems == 200_000
+
+def mapping_match_statement():
+    """ Test matching of mapping type"""
+    boats = [
+        {"🐓": 1, },
+        {"🦊": 1, "🌽": 1},
+        {"🐓": 1, "🌽": 1},
+        {"🐓": 1, "🦊": 1},
+    ]
+    problems = 0
+    valid_boats = 0
+    for _ in range(100_000):
+        for boat in boats:
+            match boat:
+                case {"🐓": _, "🌽": _}: problems += 1
+                case {"🐓": _, "🦊": _}: problems += 1
+                case _: valid_boats += 1
+    
+    assert valid_boats == 200_000
+    assert problems == 200_000
+
+class Driver:
+    def __init__(self, name, team, **extra):
+        self.name = name
+        self.team = team
+        self.extra = extra
+
+def bench_class_matching_statement():
+    drivers = [
+        Driver(name="Max Verstappen", team="Red Bull", ),
+        Driver(name="Sergio Perez", team="Red Bull", ),
+        Driver(name="Charles Leclerc", team="Ferrari", ),
+        Driver(name="Lewis Hamilton", team="Mercedes", ),
+    ]
+
+    for _ in range(100_000):
+        for driver in drivers:
+            match driver:
+                case Driver(name="Max Verstappen"): desc = f"Max Verstappen, the current world #1"
+                case Driver(name=name, team="Ferrari"): desc = f"{name}, a Ferrari driver!! 🐎"
+                case Driver(name=name, team=team): desc = f"{name}, a {team} driver."
+                case _: desc = "Invalid request"
+            # print(desc)
+
+
+def bench_class_matching_logical():
+    drivers = [
+        Driver(name="Max Verstappen", team="Red Bull", ),
+        Driver(name="Sergio Perez", team="Red Bull", ),
+        Driver(name="Charles Leclerc", team="Ferrari", ),
+        Driver(name="Lewis Hamilton", team="Mercedes", ),
+    ]
+
+    for _ in range(100_000):
+        for driver in drivers:
+            if not isinstance(driver, Driver):
+                desc = "Invalid request"
+            elif driver.name == "Max Verstappen":
+                desc = f"Max Verstappen, the current world #1"
+            elif driver.team == "Ferrari": 
+                desc = f"{driver.name}, a Ferrari driver!! 🐎"
+            else:
+                desc = f"{driver.name}, a {driver.team} driver."
+            # print(desc)
+
+
+__benchmarks__ = [
+    (sequence_match_logical, sequence_match_statement, "Match statements (sequence)"),
+    (literal_match_logical, literal_match_statement, "Match statements (literal)"),
+    (mapping_match_logical, mapping_match_statement, "Match statements (mapping)"),
+    (bench_class_matching_logical, bench_class_matching_statement, "Match statements (classes)"),
+]
--- a/bench_regex.py
+++ b/bench_regex.py
@@ -0,0 +1,35 @@
+import re
+
+def regex_with_anchors():
+    SNAKE_CASE_RE = re.compile(r'^([a-z]+\d*_[a-z\d_]*|_+[a-z\d]+[a-z\d_]*)$')
+    tests = ['data_type', 'data_type_', '_dataType', 'dataType', 'data type']
+    for x in range(100_000):
+        for test_str in tests:
+            SNAKE_CASE_RE.match(test_str)
+
+def regex_with_fullmatch():
+    SNAKE_CASE_RE = re.compile(r'([a-z]+\d*_[a-z\d_]*|_+[a-z\d]+[a-z\d_]*)')
+    tests = ['data_type', 'data_type_', '_dataType', 'dataType', 'data type']
+    for x in range(100_000):
+        for test_str in tests:
+            SNAKE_CASE_RE.fullmatch(test_str)
+
+def regex_with_ignorecase():
+    SNAKE_CASE_RE = re.compile(r'([a-z]+\d*_[a-z\d_]*|_+[a-z\d]+[a-z\d_]*)', re.IGNORECASE)
+    tests = ['data_type', 'data_type_URL', '_DataType', 'DataTypeURL', 'Data Type URL']
+    for x in range(100_000):
+        for test_str in tests:
+            SNAKE_CASE_RE.fullmatch(test_str)
+
+def regex_with_capitalrange():
+    SNAKE_CASE_RE = re.compile(r'([a-zA-Z]+\d*_[a-zA-Z\d_]*|_+[a-zA-Z\d]+[a-zA-Z\d_]*)')
+    tests = ['data_type', 'data_type_URL', '_DataType', 'DataTypeURL', 'Data Type URL']
+    for x in range(100_000):
+        for test_str in tests:
+            SNAKE_CASE_RE.fullmatch(test_str)
+
+
+__benchmarks__ = [
+    (regex_with_anchors, regex_with_fullmatch, "Using fullmatch instead of anchors"),
+    (regex_with_ignorecase, regex_with_capitalrange, "Using a-zA-Z instead of IGNORECASE"),
+]
--- a/suite.py
+++ b/suite.py
@@ -1,75 +1,106 @@
-"""
-A benchmark suite for Performance Anti-Patterns
-"""
-import timeit
-import pathlib
-import sys
-from statistics import fmean
-from rich.console import Console
-from rich.table import Table
-from rich.text import Text
-
-REPEAT = 5
-TIMES = 5
-
-if __name__ == "__main__":
-    table = Table(title=f"Anti-Pattern Benchmark Suite, repeat={REPEAT}, number={TIMES}")
-
-    table.add_column("Pattern", justify="right", style="cyan", no_wrap=True)
-    table.add_column("Benchmark", justify="right", style="cyan", no_wrap=True)
-    table.add_column("Repeat", style="magenta")
-    table.add_column("Min", style="magenta", width=7)
-    table.add_column("Max", style="magenta", width=7)
-    table.add_column("Mean", style="magenta", width=7)
-    table.add_column("Min (+)", style="blue", width=15)
-    table.add_column("Max (+)", style="blue", width=15)
-    table.add_column("Mean (+)", style="blue", width=15)
-
-    profiles_out = pathlib.Path(__file__).parent / 'profiles'
-    if not profiles_out.exists():
-        profiles_out.mkdir()
-    n = 0
-
-    for f in pathlib.Path(__file__).parent.glob("bench_*.py"):
-        if len(sys.argv) > 1 and f.stem != f"bench_{sys.argv[1]}":
-            continue
-        i = __import__(f.stem, globals(), locals(), )
-        if hasattr(i, "__benchmarks__"):
-            for benchmark in i.__benchmarks__:
-                n += 1
-                func1, func2, desc = benchmark
-                without_result = timeit.repeat(func1, repeat=REPEAT, number=TIMES)
-                with_result = timeit.repeat(func2, repeat=REPEAT, number=TIMES)
-
-                delta_mean = (abs(fmean(with_result) - fmean(without_result)) / fmean(without_result)) * 100.0
-                delta_min = (abs(min(with_result) - min(without_result)) / min(without_result)) * 100.0
-                delta_max = (abs(max(with_result) - max(without_result)) / max(without_result)) * 100.0
-
-                if min(with_result) < min(without_result):
-                    fdelta_min = Text(f"{min(with_result):.3f} ({delta_min:.1f}%)", style="green")
-                else:
-                    fdelta_min = Text(f"{min(with_result):.3f} (-{delta_min:.1f}%)", style="red")
-
-                if max(with_result) < max(without_result):
-                    fdelta_max = Text(f"{max(with_result):.3f} ({delta_max:.1f}%)", style="green")
-                else:
-                    fdelta_max = Text(f"{max(with_result):.3f} (-{delta_max:.1f}%)", style="red")
-
-                if fmean(with_result) < fmean(without_result):
-                    fdelta_mean = Text(f"{fmean(with_result):.3f} ({delta_mean:.1f}%)", style="green")
-                else:
-                    fdelta_mean = Text(f"{fmean(with_result):.3f} (-{delta_mean:.1f}%)", style="red")
-
-                table.add_row(str(n),
-                              desc,
-                              str(TIMES * REPEAT),
-                              "{:.3f}".format(min(without_result)),
-                              "{:.3f}".format(max(without_result)),
-                              "{:.3f}".format(fmean(without_result)),
-                              fdelta_min,
-                              fdelta_max,
-                              fdelta_mean,
-                              )
-
-    console = Console(width=150)
-    console.print(table)
+"""
+A benchmark suite for Performance Anti-Patterns
+"""
+import timeit
+import pathlib
+import sys
+from statistics import fmean
+from rich.console import Console
+from rich.table import Table
+from rich.text import Text
+
+REPEAT = 5
+TIMES = 5
+
+
+def format_delta(result: float, comparator: float, delta: float) -> Text:
+    """
+    Color the column a shade of green if the result was faster than the comparator,
+    red if it was slower, and format the string appropriately with %, minus symbol, etc.
+    """
+    minus = ""
+    if abs(delta) > 100:
+        formatter = "{0:.5f} ({1}{2:.1f}%)"
+    else:
+        formatter = "{0:.7f} ({1}{2:.1f}%)"
+    if result < comparator:
+        if delta < 10:
+            col = "medium_spring_green"
+        elif 10 <= delta < 20:
+            col = "spring_green1"
+        elif 20 <= delta < 40:
+            col = "spring_green2"
+        else:
+            col = "green1"
+        return Text(formatter.format(result, minus, delta), style=col)
+    else:
+        minus = "-"
+        return Text(formatter.format(result, minus, delta), style="red")
+
+
+if __name__ == "__main__":
+    table = Table(
+        title=f"Anti-Pattern Benchmark Suite, repeat={REPEAT}, number={TIMES}"
+    )
+
+    table.add_column("Benchmark", justify="right", style="cyan", no_wrap=True)
+    table.add_column("Min", width=10)
+    table.add_column("Max", width=10)
+    table.add_column("Mean", width=10)
+    table.add_column("Min (+)", style="blue", width=21)
+    table.add_column("Max (+)", style="blue", width=21)
+    table.add_column("Mean (+)", style="blue", width=21)
+
+    profiles_out = pathlib.Path(__file__).parent / "profiles"
+    if not profiles_out.exists():
+        profiles_out.mkdir()
+    n = 0
+
+    for f in pathlib.Path(__file__).parent.glob("bench_*.py"):
+        if len(sys.argv) > 1 and f.stem != f"bench_{sys.argv[1]}":
+            continue
+        i = __import__(
+            f.stem,
+            globals(),
+            locals(),
+        )
+        if hasattr(i, "__benchmarks__"):
+            for benchmark in i.__benchmarks__:
+                n += 1
+                func1, func2, desc = benchmark
+                comparator_result = timeit.repeat(func1, repeat=REPEAT, number=TIMES)
+                result = timeit.repeat(func2, repeat=REPEAT, number=TIMES)
+
+                delta_mean = (
+                    abs(fmean(result) - fmean(comparator_result))
+                    / fmean(comparator_result)
+                ) * 100.0
+                delta_min = (
+                    abs(min(result) - min(comparator_result)) / min(comparator_result)
+                ) * 100.0
+                delta_max = (
+                    abs(max(result) - max(comparator_result)) / max(comparator_result)
+                ) * 100.0
+
+                fdelta_min = format_delta(
+                    min(result), min(comparator_result), delta_min
+                )
+                fdelta_max = format_delta(
+                    max(result), max(comparator_result), delta_max
+                )
+                fdelta_mean = format_delta(
+                    fmean(result), fmean(comparator_result), delta_mean
+                )
+
+                table.add_row(
+                    desc,
+                    "{:.7f}".format(min(comparator_result)),
+                    "{:.7f}".format(max(comparator_result)),
+                    "{:.7f}".format(fmean(comparator_result)),
+                    fdelta_min,
+                    fdelta_max,
+                    fdelta_mean,
+                )
+
+    console = Console(width=150)
+    console.print(table)
Author	SHA1	Message	Date
Zev Averbach	d6b3bc7429	added two new benchmarks for one new comparator. changed some variable names and string formatters for greater clarity, added a docstring for the same purpose	2022-06-29 15:47:13 +02:00
Anthony Shaw	d1e4a387c6	Merge pull request #5 from pamelafox/patch-2 Add a benchmark for regex related practices	2022-05-28 16:41:50 +10:00
Pamela Fox	a1727824f0	Add a benchmark for regex related practices	2022-05-24 23:12:27 -07:00
Anthony Shaw	b56a7d1ce8	Merge pull request #4 from pamelafox/patch-1 Add benchmark comparing join of gen expression vs join of list comp	2022-05-25 15:06:08 +10:00
Pamela Fox	fe1b482fba	Add benchmark comparing join of generator expression vs join of list comprehension	2022-05-24 21:58:00 -07:00
Anthony Shaw	2b8c41b589	Correct the output time	2022-05-17 14:26:10 +10:00
Anthony Shaw	41199a18ab	Add class matching	2022-05-17 14:14:21 +10:00
Anthony Shaw	0caf235ab5	Add mapping benchmarks	2022-05-16 13:28:28 +10:00
Anthony Shaw	55e2b6df8a	Positive and negative match statements	2022-05-16 12:56:09 +10:00
Anthony Shaw	6d27822648	Refactor a simpler test script	2022-05-16 12:42:15 +10:00