Module hebi.optimize.optimize_varlen

Expand source code
from ast import *
from copy import copy
from collections import defaultdict

from ..util import CompilingNodeTransformer, CompilingNodeVisitor

"""
Rewrites all variable names to a minimal length equivalent
"""


class NameCollector(CompilingNodeVisitor):
    step = "Collecting occuring variable names"

    def __init__(self):
        self.vars = defaultdict(int)

    def visit_Name(self, node: Name) -> None:
        self.vars[node.id] += 1

    def visit_ClassDef(self, node: ClassDef):
        self.vars[node.name] += 1
        # ignore the content (i.e. attribute names) of class definitions

    def visit_FunctionDef(self, node: FunctionDef):
        self.vars[node.name] += 1
        for a in node.args.args:
            # ignore type hints
            self.vars[a.arg] += 1
        for s in node.body:
            self.visit(s)


def bs_from_int(i: int):
    hex_str = f"{i:x}"
    if len(hex_str) % 2 == 1:
        hex_str = "0" + hex_str
    return bytes.fromhex(hex_str)


class OptimizeVarlen(CompilingNodeTransformer):
    step = "Reducing the length of variable names"

    varmap = None

    def visit_Module(self, node: Module) -> Module:
        # collect all variable names
        collector = NameCollector()
        collector.visit(node)
        # sort by most used
        varmap = {}
        varnames = sorted(collector.vars.items(), key=lambda x: x[1], reverse=True)
        for i, (v, _) in enumerate(varnames):
            varmap[v] = bs_from_int(i)
        self.varmap = varmap
        node_cp = copy(node)
        node_cp.body = [self.visit(s) for s in node.body]
        return node_cp

    def visit_Name(self, node: Name) -> Name:
        nc = copy(node)
        nc.orig_id = node.id
        nc.id = self.varmap[node.id]
        return nc

    def visit_ClassDef(self, node: ClassDef) -> ClassDef:
        node_cp = copy(node)
        node_cp.orig_name = node.name
        node_cp.name = self.varmap[node.name]
        # ignore the content of class definitions
        return node_cp

    def visit_FunctionDef(self, node: FunctionDef) -> FunctionDef:
        node_cp = copy(node)
        node_cp.orig_name = node.name
        node_cp.name = self.varmap[node.name]
        node_cp.args = copy(node.args)
        node_cp.args.args = []
        for a in node.args.args:
            a_cp = copy(a)
            a_cp.orig_arg = a.arg
            a_cp.arg = self.varmap[a.arg]
            node_cp.args.args.append(a_cp)
        node_cp.body = [self.visit(s) for s in node.body]
        return node_cp

Functions

def bs_from_int(i: int)
Expand source code
def bs_from_int(i: int):
    hex_str = f"{i:x}"
    if len(hex_str) % 2 == 1:
        hex_str = "0" + hex_str
    return bytes.fromhex(hex_str)

Classes

class NameCollector

A node visitor base class that walks the abstract syntax tree and calls a visitor function for every node found. This function may return a value which is forwarded by the visit method.

This class is meant to be subclassed, with the subclass adding visitor methods.

Per default the visitor functions for the nodes are 'visit_' + class name of the node. So a TryFinally node visit function would be visit_TryFinally. This behavior can be changed by overriding the visit method. If no visitor function exists for a node (return value None) the generic_visit visitor is used instead.

Don't use the NodeVisitor if you want to apply changes to nodes during traversing. For this a special visitor exists (NodeTransformer) that allows modifications.

Expand source code
class NameCollector(CompilingNodeVisitor):
    step = "Collecting occuring variable names"

    def __init__(self):
        self.vars = defaultdict(int)

    def visit_Name(self, node: Name) -> None:
        self.vars[node.id] += 1

    def visit_ClassDef(self, node: ClassDef):
        self.vars[node.name] += 1
        # ignore the content (i.e. attribute names) of class definitions

    def visit_FunctionDef(self, node: FunctionDef):
        self.vars[node.name] += 1
        for a in node.args.args:
            # ignore type hints
            self.vars[a.arg] += 1
        for s in node.body:
            self.visit(s)

Ancestors

Class variables

var step

Methods

def visit(self, node)

Inherited from: CompilingNodeVisitor.visit

Visit a node.

def visit_ClassDef(self, node: _ast.ClassDef)
Expand source code
def visit_ClassDef(self, node: ClassDef):
    self.vars[node.name] += 1
    # ignore the content (i.e. attribute names) of class definitions
def visit_FunctionDef(self, node: _ast.FunctionDef)
Expand source code
def visit_FunctionDef(self, node: FunctionDef):
    self.vars[node.name] += 1
    for a in node.args.args:
        # ignore type hints
        self.vars[a.arg] += 1
    for s in node.body:
        self.visit(s)
def visit_Name(self, node: _ast.Name) ‑> None
Expand source code
def visit_Name(self, node: Name) -> None:
    self.vars[node.id] += 1
class OptimizeVarlen

A :class:NodeVisitor subclass that walks the abstract syntax tree and allows modification of nodes.

The NodeTransformer will walk the AST and use the return value of the visitor methods to replace or remove the old node. If the return value of the visitor method is None, the node will be removed from its location, otherwise it is replaced with the return value. The return value may be the original node in which case no replacement takes place.

Here is an example transformer that rewrites all occurrences of name lookups (foo) to data['foo']::

class RewriteName(NodeTransformer):

   def visit_Name(self, node):
       return Subscript(
           value=Name(id='data', ctx=Load()),
           slice=Index(value=Str(s=node.id)),
           ctx=node.ctx
       )

Keep in mind that if the node you're operating on has child nodes you must either transform the child nodes yourself or call the :meth:generic_visit method for the node first.

For nodes that were part of a collection of statements (that applies to all statement nodes), the visitor may also return a list of nodes rather than just a single node.

Usually you use the transformer like this::

node = YourTransformer().visit(node)

Expand source code
class OptimizeVarlen(CompilingNodeTransformer):
    step = "Reducing the length of variable names"

    varmap = None

    def visit_Module(self, node: Module) -> Module:
        # collect all variable names
        collector = NameCollector()
        collector.visit(node)
        # sort by most used
        varmap = {}
        varnames = sorted(collector.vars.items(), key=lambda x: x[1], reverse=True)
        for i, (v, _) in enumerate(varnames):
            varmap[v] = bs_from_int(i)
        self.varmap = varmap
        node_cp = copy(node)
        node_cp.body = [self.visit(s) for s in node.body]
        return node_cp

    def visit_Name(self, node: Name) -> Name:
        nc = copy(node)
        nc.orig_id = node.id
        nc.id = self.varmap[node.id]
        return nc

    def visit_ClassDef(self, node: ClassDef) -> ClassDef:
        node_cp = copy(node)
        node_cp.orig_name = node.name
        node_cp.name = self.varmap[node.name]
        # ignore the content of class definitions
        return node_cp

    def visit_FunctionDef(self, node: FunctionDef) -> FunctionDef:
        node_cp = copy(node)
        node_cp.orig_name = node.name
        node_cp.name = self.varmap[node.name]
        node_cp.args = copy(node.args)
        node_cp.args.args = []
        for a in node.args.args:
            a_cp = copy(a)
            a_cp.orig_arg = a.arg
            a_cp.arg = self.varmap[a.arg]
            node_cp.args.args.append(a_cp)
        node_cp.body = [self.visit(s) for s in node.body]
        return node_cp

Ancestors

Class variables

var step
var varmap

Methods

def visit(self, node)

Inherited from: CompilingNodeTransformer.visit

Visit a node.

def visit_ClassDef(self, node: _ast.ClassDef) ‑> _ast.ClassDef
Expand source code
def visit_ClassDef(self, node: ClassDef) -> ClassDef:
    node_cp = copy(node)
    node_cp.orig_name = node.name
    node_cp.name = self.varmap[node.name]
    # ignore the content of class definitions
    return node_cp
def visit_FunctionDef(self, node: _ast.FunctionDef) ‑> _ast.FunctionDef
Expand source code
def visit_FunctionDef(self, node: FunctionDef) -> FunctionDef:
    node_cp = copy(node)
    node_cp.orig_name = node.name
    node_cp.name = self.varmap[node.name]
    node_cp.args = copy(node.args)
    node_cp.args.args = []
    for a in node.args.args:
        a_cp = copy(a)
        a_cp.orig_arg = a.arg
        a_cp.arg = self.varmap[a.arg]
        node_cp.args.args.append(a_cp)
    node_cp.body = [self.visit(s) for s in node.body]
    return node_cp
def visit_Module(self, node: _ast.Module) ‑> _ast.Module
Expand source code
def visit_Module(self, node: Module) -> Module:
    # collect all variable names
    collector = NameCollector()
    collector.visit(node)
    # sort by most used
    varmap = {}
    varnames = sorted(collector.vars.items(), key=lambda x: x[1], reverse=True)
    for i, (v, _) in enumerate(varnames):
        varmap[v] = bs_from_int(i)
    self.varmap = varmap
    node_cp = copy(node)
    node_cp.body = [self.visit(s) for s in node.body]
    return node_cp
def visit_Name(self, node: _ast.Name) ‑> _ast.Name
Expand source code
def visit_Name(self, node: Name) -> Name:
    nc = copy(node)
    nc.orig_id = node.id
    nc.id = self.varmap[node.id]
    return nc