diff -r fb80df16c4ff -r 6a9ca0177020 BENCHMARKS.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BENCHMARKS.txt Tue May 20 11:04:02 2014 +0200 @@ -0,0 +1,435 @@ +Why registervm is faster +************************ + + + * NormalInstanceAttribute: + + - 40 ms => 21 ms (1.9x faster) + - fewer instructions: 381 => 81 + + * SimpleListManipulation: + + - 28 ms => 21 ms (1.3x faster) + - fewer instructions: 388 => 114 + + * StringPredicates: + + - 42 ms => 24 ms (1.8x faster) + - fewer instructions: 303 => 92 + + + * BuiltinMethodLookup (without moving LOAD_ATTR_REG): + + - 24 ms => 1 ms, 24x faster + - merge duplicate loads: 390 instructions => 22 + + * SpecialInstanceAttribute: + + - 40 ms => 21ms, 1.9x faster + - remove duplicate LOAD_ATTR_REG and useless instructions: + 381 instructions => 81 + + +changeset 3ea9aa5ee434 (Dec 14 2012): pybench +********************************************* + +------------------------------------------------------------------------------- +PYBENCH 2.1 +------------------------------------------------------------------------------- +* using CPython 3.4.0a0 (default:c5ae95ba2dec+, Dec 14 2012, 18:05:14) [GCC 4.6.3 20120306 (Red Hat 4.6.3-2)] +* disabled garbage collection +* system check interval set to maximum: 2147483647 +* using timer: time.perf_counter +* timer: resolution=1e-09, implementation=clock_gettime(CLOCK_MONOTONIC) + +------------------------------------------------------------------------------- +Benchmark: registervm.pybench +------------------------------------------------------------------------------- + + Rounds: 10 + Warp: 10 + Timer: time.perf_counter + + Machine Details: + Platform ID: Linux-3.6.2-1.fc16.x86_64-x86_64-with-fedora-16-Verne + Processor: x86_64 + + Python: + Implementation: CPython + Executable: /home/haypo/prog/python/registervm/python + Version: 3.4.0a0 + Compiler: GCC 4.6.3 20120306 (Red Hat 4.6.3-2) + Bits: 64bit + Build: Dec 14 2012 18:05:14 (#default:c5ae95ba2dec+) + Unicode: UCS4 + + +------------------------------------------------------------------------------- +Comparing with: original.pybench +------------------------------------------------------------------------------- + + Rounds: 10 + Warp: 10 + Timer: time.perf_counter + + Machine Details: + Platform ID: Linux-3.6.2-1.fc16.x86_64-x86_64-with-fedora-16-Verne + Processor: x86_64 + + Python: + Implementation: CPython + Executable: /home/haypo/prog/python/registervm/python + Version: 3.4.0a0 + Compiler: GCC 4.6.3 20120306 (Red Hat 4.6.3-2) + Bits: 64bit + Build: Dec 14 2012 18:05:14 (#default:c5ae95ba2dec+) + Unicode: UCS4 + + +Test minimum run-time average run-time + this other diff this other diff +------------------------------------------------------------------------------- + BuiltinFunctionCalls: 33ms 38ms -13.4% 34ms 39ms -14.1% + BuiltinMethodLookup: 2ms 24ms -93.4% 2ms 24ms -93.4% + CompareFloats: 24ms 28ms -14.8% 25ms 29ms -14.5% + CompareFloatsIntegers: 58ms 60ms -4.1% 58ms 61ms -5.4% + CompareIntegers: 38ms 44ms -12.5% 39ms 45ms -12.8% + CompareInternedStrings: 24ms 30ms -19.2% 25ms 30ms -18.8% + CompareLongs: 22ms 25ms -11.8% 23ms 26ms -12.4% + CompareStrings: 21ms 25ms -14.9% 22ms 26ms -15.4% + ComplexPythonFunctionCalls: 69ms 69ms -0.6% 70ms 71ms -1.5% + ConcatStrings: 35ms 37ms -6.2% 35ms 38ms -7.1% + CreateInstances: 57ms 57ms +0.9% 58ms 58ms -0.1% + CreateNewInstances: 42ms 43ms -0.4% 43ms 44ms -2.4% + CreateStringsWithConcat: 56ms 60ms -6.6% 56ms 61ms -7.1% + DictCreation: 46ms 47ms -2.7% 47ms 48ms -2.5% + DictWithFloatKeys: 32ms 37ms -12.8% 33ms 38ms -13.0% + DictWithIntegerKeys: 23ms 29ms -21.0% 24ms 30ms -20.5% + DictWithStringKeys: 20ms 26ms -24.3% 20ms 27ms -26.0% + ForLoops: 31ms 30ms +1.1% 31ms 31ms +1.4% + IfThenElse: 32ms 38ms -16.6% 32ms 39ms -16.6% + ListSlicing: 35ms 36ms -0.2% 36ms 36ms -0.4% + NestedForLoops: 44ms 43ms +2.3% 45ms 43ms +2.8% + NestedListComprehensions: 44ms 43ms +2.2% 45ms 44ms +1.6% + NormalClassAttribute: 57ms 74ms -22.8% 58ms 76ms -23.6% + NormalInstanceAttribute: 21ms 40ms -48.1% 21ms 42ms -49.3% + PythonFunctionCalls: 79ms 85ms -7.5% 79ms 87ms -8.9% + PythonMethodCalls: 70ms 80ms -13.4% 71ms 82ms -14.1% + Recursion: 201ms 206ms -2.6% 202ms 212ms -4.5% + SecondImport: 32ms 32ms -1.9% 33ms 33ms -1.5% + SecondPackageImport: 33ms 34ms -2.4% 34ms 35ms -2.4% + SecondSubmoduleImport: 74ms 79ms -6.6% 74ms 81ms -8.6% + SimpleComplexArithmetic: 23ms 28ms -17.8% 25ms 29ms -12.7% + SimpleDictManipulation: 96ms 100ms -4.0% 98ms 103ms -4.8% + SimpleFloatArithmetic: 21ms 28ms -24.5% 24ms 29ms -16.7% + SimpleIntFloatArithmetic: 26ms 35ms -24.3% 28ms 36ms -23.1% + SimpleIntegerArithmetic: 26ms 35ms -24.6% 28ms 36ms -22.7% + SimpleListComprehensions: 36ms 36ms +1.3% 38ms 37ms +1.0% + SimpleListManipulation: 21ms 28ms -26.0% 21ms 29ms -28.2% + SimpleLongArithmetic: 19ms 24ms -22.3% 19ms 25ms -22.7% + SmallLists: 33ms 40ms -16.5% 34ms 41ms -17.6% + SmallTuples: 38ms 48ms -21.1% 38ms 49ms -22.5% + SpecialClassAttribute: 72ms 112ms -35.7% 73ms 115ms -36.5% + SpecialInstanceAttribute: 21ms 40ms -48.3% 21ms 41ms -48.6% + StringMappings: 72ms 79ms -9.2% 73ms 82ms -11.1% + StringPredicates: 24ms 42ms -43.2% 24ms 43ms -43.1% + StringSlicing: 40ms 44ms -8.1% 41ms 45ms -8.6% + TryExcept: 25ms 25ms +1.1% 26ms 26ms -0.1% + TryFinally: 80ms 79ms +0.1% 80ms 82ms -1.8% + TryRaiseExcept: 16ms 16ms -0.0% 16ms 17ms -1.0% + TupleSlicing: 45ms 45ms -0.3% 47ms 47ms +1.0% + WithFinally: 94ms 94ms +0.8% 96ms 97ms -0.8% + WithRaiseExcept: 69ms 68ms +0.7% 69ms 70ms -0.5% +------------------------------------------------------------------------------- +Totals: 2250ms 2545ms -11.6% 2292ms 2612ms -12.3% + + +changeset 3ea9aa5ee434 (Dec 14 2012): use_regs.py +************************************************* + +Bench fact_loop +Stack: 25.4 ms +Register: 25.1 ms (-1.0%) +Bench fact +Stack: 59.3 ms +Register: 61.1 ms (+3.1%) +Bench pow2 +Stack: 81.5 ms +Register: 74.1 ms (-9.1%) +Bench loop_sum +Stack: 27.3 ms +Register: 23.1 ms (-15.6%) +Bench loop_noop +Stack: 99.6 ms +Register: 96.1 ms (-3.5%) +Bench sieve +Stack: 43.7 ms +Register: 43.4 ms (-0.7%) +Bench bisect +Stack: 38.7 ms +Register: 34.5 ms (-10.9%) +Bench qsort2 +Stack: 124.9 ms +Register: 124.5 ms (-0.4%) + + +changeset 3dbbc9b29d97 (2012-11-28) +*********************************** + +------------------------------------------------------------------------------- +PYBENCH 2.1 +------------------------------------------------------------------------------- +* using CPython 3.4.0a0 (default:fd7efb07fe0d+, Nov 28 2012, 00:24:25) [GCC 4.6.3 20120306 (Red Hat 4.6.3-2)] +* disabled garbage collection +* system check interval set to maximum: 2147483647 +* using timer: time.perf_counter +* timer: resolution=1e-09, implementation=clock_gettime(CLOCK_MONOTONIC) + +------------------------------------------------------------------------------- +Benchmark: pybench-registervm +------------------------------------------------------------------------------- + + Rounds: 10 + Warp: 10 + Timer: time.perf_counter + + Machine Details: + Platform ID: Linux-3.6.2-1.fc16.x86_64-x86_64-with-fedora-16-Verne + Processor: x86_64 + + Python: + Implementation: CPython + Executable: /home/haypo/prog/python/registervm/python + Version: 3.4.0a0 + Compiler: GCC 4.6.3 20120306 (Red Hat 4.6.3-2) + Bits: 64bit + Build: Nov 28 2012 00:24:25 (#default:fd7efb07fe0d+) + Unicode: UCS4 + + +------------------------------------------------------------------------------- +Comparing with: pybench-orig +------------------------------------------------------------------------------- + + Rounds: 10 + Warp: 10 + Timer: time.perf_counter + + Machine Details: + Platform ID: Linux-3.6.2-1.fc16.x86_64-x86_64-with-fedora-16-Verne + Processor: x86_64 + + Python: + Implementation: CPython + Executable: /home/haypo/prog/python/registervm/python + Version: 3.4.0a0 + Compiler: GCC 4.6.3 20120306 (Red Hat 4.6.3-2) + Bits: 64bit + Build: Nov 28 2012 00:24:25 (#default:fd7efb07fe0d+) + Unicode: UCS4 + + +Test minimum run-time average run-time + this other diff this other diff +------------------------------------------------------------------------------- + BuiltinFunctionCalls: 33ms 39ms -14.4% 34ms 39ms -13.3% + BuiltinMethodLookup: 2ms 23ms -93.2% 2ms 24ms -93.1% + CompareFloats: 25ms 29ms -14.0% 25ms 29ms -12.9% + CompareFloatsIntegers: 55ms 58ms -5.7% 55ms 58ms -5.3% + CompareIntegers: 38ms 44ms -12.7% 39ms 44ms -11.6% + CompareInternedStrings: 25ms 30ms -15.2% 26ms 30ms -14.2% + CompareLongs: 22ms 26ms -12.9% 23ms 26ms -12.3% + CompareStrings: 21ms 26ms -18.2% 22ms 27ms -17.5% + ComplexPythonFunctionCalls: 72ms 71ms +1.2% 73ms 72ms +1.4% + ConcatStrings: 34ms 37ms -6.5% 35ms 37ms -4.2% + CreateInstances: 55ms 59ms -6.3% 56ms 60ms -6.6% + CreateNewInstances: 42ms 43ms -2.6% 43ms 44ms -2.7% + CreateStringsWithConcat: 57ms 60ms -5.6% 67ms 61ms +11.2% + DictCreation: 44ms 46ms -5.4% 46ms 47ms -2.1% + DictWithFloatKeys: 34ms 38ms -9.4% 35ms 38ms -8.2% + DictWithIntegerKeys: 24ms 30ms -21.1% 25ms 32ms -23.2% + DictWithStringKeys: 20ms 26ms -20.3% 22ms 26ms -15.3% + ForLoops: 31ms 30ms +4.8% 33ms 30ms +10.4% + IfThenElse: 32ms 39ms -19.7% 32ms 40ms -18.0% + ListSlicing: 36ms 36ms +0.9% 36ms 36ms +1.4% + NestedForLoops: 44ms 47ms -6.0% 46ms 47ms -3.0% + NestedListComprehensions: 44ms 42ms +5.1% 45ms 43ms +5.0% + NormalClassAttribute: 55ms 75ms -26.2% 56ms 75ms -25.5% + NormalInstanceAttribute: 20ms 39ms -48.9% 20ms 39ms -48.4% + PythonFunctionCalls: 80ms 88ms -8.8% 81ms 89ms -9.0% + PythonMethodCalls: 70ms 81ms -13.2% 71ms 82ms -12.8% + Recursion: 203ms 205ms -0.8% 206ms 206ms -0.2% + SecondImport: 32ms 33ms -3.2% 32ms 33ms -2.7% + SecondPackageImport: 34ms 34ms -0.4% 35ms 35ms -0.2% + SecondSubmoduleImport: 72ms 77ms -6.8% 74ms 78ms -5.2% + SimpleComplexArithmetic: 22ms 28ms -21.7% 23ms 29ms -18.7% + SimpleDictManipulation: 96ms 99ms -3.1% 97ms 100ms -2.7% + SimpleFloatArithmetic: 21ms 27ms -24.1% 24ms 28ms -13.9% + SimpleIntFloatArithmetic: 28ms 35ms -20.6% 28ms 35ms -19.4% + SimpleIntegerArithmetic: 28ms 35ms -19.5% 28ms 35ms -19.0% + SimpleListComprehensions: 37ms 35ms +3.8% 38ms 36ms +4.7% + SimpleListManipulation: 21ms 28ms -26.5% 21ms 28ms -25.3% + SimpleLongArithmetic: 19ms 23ms -16.5% 20ms 23ms -12.9% + SmallLists: 32ms 39ms -17.7% 33ms 40ms -15.9% + SmallTuples: 38ms 46ms -17.4% 38ms 46ms -16.6% + SpecialClassAttribute: 71ms 112ms -36.0% 72ms 112ms -35.6% + SpecialInstanceAttribute: 20ms 40ms -50.1% 20ms 40ms -49.1% + StringMappings: 73ms 80ms -8.2% 75ms 80ms -7.0% + StringPredicates: 25ms 44ms -43.7% 25ms 44ms -42.7% + StringSlicing: 40ms 43ms -7.7% 40ms 43ms -6.5% + TryExcept: 25ms 25ms +0.9% 25ms 25ms +1.9% + TryFinally: 81ms 80ms +0.8% 82ms 81ms +1.0% + TryRaiseExcept: 17ms 16ms +3.9% 17ms 16ms +3.7% + TupleSlicing: 48ms 48ms -0.4% 49ms 49ms -0.6% + WithFinally: 94ms 93ms +1.0% 95ms 94ms +1.5% + WithRaiseExcept: 68ms 67ms +1.2% 69ms 67ms +2.4% +------------------------------------------------------------------------------- +Totals: 2261ms 2553ms -11.5% 2317ms 2578ms -10.1% + +(this=pybench-registervm, other=pybench-orig) + +changeset bd0f0345d87f (Nov 23 2012): use_regs.py +************************************************* + +Bench fact_loop +Stack: 25.8 ms +Register: 23.9 ms (-7.5%) +Bench fact +Stack: 59.9 ms +Register: 61.3 ms (+2.5%) +Bench pow2 +Stack: 86.2 ms +Register: 75.6 ms (-12.2%) +Bench loop_total +Stack: 27.0 ms +Register: 24.2 ms (-10.6%) +Bench loop_noop +Stack: 105.4 ms +Register: 106.6 ms (+1.1%) +Bench sieve +Stack: 44.7 ms +Register: 44.6 ms (-0.1%) +Bench bisect +Stack: 38.3 ms +Register: 34.0 ms (-11.3%) +Bench qsort2 +Stack: 140.1 ms +Register: 144.7 ms (+3.2%) + + +changeset bd0f0345d87f (Nov 23 2012): pybench +********************************************* + +------------------------------------------------------------------------------- +PYBENCH 2.1 +------------------------------------------------------------------------------- +* using CPython 3.4.0a0 (default:4b41e6685a24+, Nov 23 2012, 20:58:33) [GCC 4.6.3 20120306 (Red Hat 4.6.3-2)] +* disabled garbage collection +* system check interval set to maximum: 2147483647 +* using timer: time.perf_counter +* timer: resolution=1e-09, implementation=clock_gettime(CLOCK_MONOTONIC) + +------------------------------------------------------------------------------- +Benchmark: pybench-registervm +------------------------------------------------------------------------------- + + Rounds: 10 + Warp: 10 + Timer: time.perf_counter + + Machine Details: + Platform ID: Linux-3.6.2-1.fc16.x86_64-x86_64-with-fedora-16-Verne + Processor: x86_64 + + Python: + Implementation: CPython + Executable: /home/haypo/prog/python/registervm/python + Version: 3.4.0a0 + Compiler: GCC 4.6.3 20120306 (Red Hat 4.6.3-2) + Bits: 64bit + Build: Nov 23 2012 20:58:33 (#default:4b41e6685a24+) + Unicode: UCS4 + + +------------------------------------------------------------------------------- +Comparing with: pybench-orig +------------------------------------------------------------------------------- + + Rounds: 10 + Warp: 10 + Timer: time.perf_counter + + Machine Details: + Platform ID: Linux-3.6.2-1.fc16.x86_64-x86_64-with-fedora-16-Verne + Processor: x86_64 + + Python: + Implementation: CPython + Executable: /home/haypo/prog/python/registervm/python + Version: 3.4.0a0 + Compiler: GCC 4.6.3 20120306 (Red Hat 4.6.3-2) + Bits: 64bit + Build: Nov 23 2012 20:58:33 (#default:4b41e6685a24+) + Unicode: UCS4 + + +Test minimum run-time average run-time + this other diff this other diff +------------------------------------------------------------------------------- + BuiltinFunctionCalls: 33ms 39ms -16.0% 34ms 40ms -15.3% + BuiltinMethodLookup: 1ms 23ms -93.7% 2ms 24ms -93.8% + CompareFloats: 24ms 28ms -13.2% 25ms 29ms -11.9% + CompareFloatsIntegers: 55ms 59ms -6.2% 56ms 60ms -5.8% + CompareIntegers: 38ms 44ms -14.8% 39ms 45ms -12.4% + CompareInternedStrings: 24ms 30ms -18.3% 25ms 30ms -15.7% + CompareLongs: 22ms 26ms -13.8% 23ms 26ms -12.0% + CompareStrings: 21ms 25ms -17.0% 22ms 26ms -15.4% + ComplexPythonFunctionCalls: 67ms 71ms -5.1% 68ms 72ms -5.3% + ConcatStrings: 29ms 29ms +1.8% 29ms 29ms +2.4% + CreateInstances: 54ms 56ms -3.6% 55ms 58ms -3.8% + CreateNewInstances: 41ms 42ms -2.3% 41ms 43ms -3.2% + CreateStringsWithConcat: 59ms 62ms -4.3% 60ms 62ms -3.6% + DictCreation: 46ms 47ms -2.6% 47ms 48ms -1.5% + DictWithFloatKeys: 33ms 39ms -13.6% 34ms 39ms -12.6% + DictWithIntegerKeys: 23ms 30ms -23.6% 24ms 31ms -24.5% + DictWithStringKeys: 21ms 26ms -20.5% 21ms 26ms -20.5% + ForLoops: 34ms 30ms +10.4% 34ms 31ms +12.1% + IfThenElse: 32ms 38ms -16.3% 32ms 38ms -15.4% + ListSlicing: 36ms 36ms -0.3% 36ms 36ms -0.6% + NestedForLoops: 44ms 43ms +1.0% 44ms 44ms +1.7% + NestedListComprehensions: 48ms 45ms +7.6% 49ms 45ms +8.1% + NormalClassAttribute: 61ms 82ms -25.1% 62ms 83ms -25.1% + NormalInstanceAttribute: 20ms 38ms -47.5% 20ms 39ms -48.0% + PythonFunctionCalls: 79ms 86ms -8.7% 80ms 87ms -7.5% + PythonMethodCalls: 70ms 82ms -15.3% 71ms 83ms -14.4% + Recursion: 202ms 205ms -1.3% 205ms 206ms -0.7% + SecondImport: 32ms 33ms -4.7% 33ms 34ms -3.6% + SecondPackageImport: 33ms 35ms -4.7% 34ms 35ms -3.8% + SecondSubmoduleImport: 74ms 77ms -4.4% 76ms 80ms -4.9% + SimpleComplexArithmetic: 23ms 29ms -19.9% 23ms 29ms -19.5% + SimpleDictManipulation: 95ms 102ms -6.7% 96ms 103ms -6.2% + SimpleFloatArithmetic: 24ms 28ms -15.8% 24ms 29ms -16.0% + SimpleIntFloatArithmetic: 28ms 33ms -13.6% 29ms 33ms -13.2% + SimpleIntegerArithmetic: 28ms 33ms -15.5% 29ms 34ms -15.1% + SimpleListComprehensions: 37ms 36ms +3.7% 38ms 37ms +3.8% + SimpleListManipulation: 20ms 28ms -29.4% 20ms 28ms -29.1% + SimpleLongArithmetic: 20ms 24ms -16.5% 21ms 24ms -14.8% + SmallLists: 33ms 40ms -18.2% 34ms 41ms -19.0% + SmallTuples: 37ms 46ms -19.8% 38ms 47ms -18.5% + SpecialClassAttribute: 79ms 116ms -31.6% 81ms 118ms -31.2% + SpecialInstanceAttribute: 20ms 38ms -47.9% 20ms 39ms -48.1% + StringMappings: 74ms 81ms -9.4% 75ms 82ms -8.4% + StringPredicates: 24ms 44ms -45.7% 24ms 45ms -45.3% + StringSlicing: 40ms 43ms -6.9% 41ms 44ms -6.9% + TryExcept: 26ms 26ms -0.3% 26ms 26ms +0.1% + TryFinally: 71ms 80ms -11.5% 71ms 81ms -11.4% + TryRaiseExcept: 16ms 16ms +0.8% 17ms 16ms +1.0% + TupleSlicing: 44ms 44ms +0.5% 46ms 47ms -3.3% + WithFinally: 93ms 93ms +0.1% 95ms 94ms +0.6% + WithRaiseExcept: 71ms 71ms +0.1% 72ms 71ms +1.4% +------------------------------------------------------------------------------- +Totals: 2260ms 2559ms -11.7% 2303ms 2595ms -11.3% + +(this=pybench-registervm, other=pybench-orig) + + diff -r fb80df16c4ff -r 6a9ca0177020 Include/frameobject.h --- a/Include/frameobject.h Tue Oct 23 21:14:34 2012 +0300 +++ b/Include/frameobject.h Tue May 20 11:04:02 2014 +0200 @@ -47,9 +47,11 @@ int f_lineno; /* Current line number */ int f_iblock; /* index in f_blockstack */ PyTryBlock f_blockstack[CO_MAXBLOCKS]; /* for try and loop blocks */ + PyObject **f_registers; /* points after the first register */ PyObject *f_localsplus[1]; /* locals+stack, dynamically sized */ } PyFrameObject; +#define FRAME_NREGISTER 256 /* Standard object interface */ diff -r fb80df16c4ff -r 6a9ca0177020 Include/opcode.h --- a/Include/opcode.h Tue Oct 23 21:14:34 2012 +0300 +++ b/Include/opcode.h Tue May 20 11:04:02 2014 +0200 @@ -121,9 +121,9 @@ #define MAKE_CLOSURE 134 /* same as MAKE_FUNCTION */ #define LOAD_CLOSURE 135 /* Load free variable from closure */ -#define LOAD_DEREF 136 /* Load and dereference from closure cell */ -#define STORE_DEREF 137 /* Store into cell */ -#define DELETE_DEREF 138 /* Delete closure cell */ +#define LOAD_DEREF 136 /* Load and dereference from closure cell */ +#define STORE_DEREF 137 /* Store into cell */ +#define DELETE_DEREF 138 /* Delete closure cell */ /* The next 3 opcodes must be contiguous and satisfy (CALL_FUNCTION_VAR - CALL_FUNCTION) & 3 == 1 */ @@ -140,6 +140,66 @@ #define SET_ADD 146 #define MAP_ADD 147 +#define USE_REGISTERS 148 /* Opcodes from here use registers: */ + +#define UNARY_NOT_REG 148 +#define LOAD_CONST_REG 150 /* Index in const list */ +#define RETURN_VALUE_REG 151 +#define BINARY_ADD_REG 152 +#define POP_REG 153 +#define MOVE_REG 154 +#define PUSH_REG 155 +#define BINARY_MULTIPLY_REG 156 +#define LOAD_GLOBAL_REG 157 +#define BINARY_SUBTRACT_REG 158 +#define FOR_ITER_REG 159 +#define COMPARE_REG 160 /* Comparison operator */ +#define JUMP_IF_FALSE_REG 161 +#define CALL_FUNCTION_REG 162 +#define GET_ITER_REG 163 +#define BINARY_SUBSCR_REG 164 +#define LOAD_ATTR_REG 165 +#define BINARY_FLOOR_DIVIDE_REG 166 +#define JUMP_IF_TRUE_REG 167 +#define UNARY_NEGATIVE_REG 168 +#define BUILD_SLICE2_REG 169 +#define BUILD_LIST_REG 170 +#define UNPACK_SEQUENCE_REG 171 +#define BUILD_TUPLE_REG 172 +#define INPLACE_ADD_REG 173 +#define INPLACE_MULTIPLY_REG 174 +#define INPLACE_SUBTRACT_REG 175 +#define INPLACE_FLOOR_DIVIDE_REG 176 +#define STORE_GLOBAL_REG 177 +#define STORE_ATTR_REG 178 +#define BUILD_MAP_REG 179 +#define STORE_MAP_REG 180 +#define MAKE_FUNCTION_REG 181 +#define LOAD_BUILD_CLASS_REG 182 +#define CALL_PROCEDURE_REG 183 +#define CLEAR_REG 184 +#define MAKE_CLOSURE_REG 185 +#define LOAD_CLOSURE_REG 186 +#define STORE_DEREF_REG 187 +#define DELETE_SUBSCR_REG 188 +#define LOAD_DEREF_REG 189 +#define BINARY_MODULO_REG 190 +#define STORE_SUBSCR_REG 191 +#define BINARY_TRUE_DIVIDE_REG 192 +#define INPLACE_TRUE_DIVIDE_REG 193 +#define INPLACE_MODULO_REG 194 +#define LOAD_NAME_REG 195 +#define LIST_APPEND_REG 196 +#define STORE_NAME_REG 197 +#define IMPORT_NAME_REG 198 +#define IMPORT_FROM_REG 199 +#define IMPORT_STAR_REG 200 +#define YIELD_REG 201 +#define STORE_LOCALS_REG 202 +#define UNARY_POSITIVE_REG 203 +#define MAP_ADD_REG 204 +#define BUILD_SLICE3_REG 205 +#define UNPACK_EX_REG 206 /* EXCEPT_HANDLER is a special, implicit block type which is created when entering an except handler. It is not an opcode but we define it here @@ -151,7 +211,7 @@ enum cmp_op {PyCmp_LT=Py_LT, PyCmp_LE=Py_LE, PyCmp_EQ=Py_EQ, PyCmp_NE=Py_NE, PyCmp_GT=Py_GT, PyCmp_GE=Py_GE, PyCmp_IN, PyCmp_NOT_IN, PyCmp_IS, PyCmp_IS_NOT, PyCmp_EXC_MATCH, PyCmp_BAD}; -#define HAS_ARG(op) ((op) >= HAVE_ARGUMENT) +#define HAS_ARG(op) ((op) >= HAVE_ARGUMENT && (op) < USE_REGISTERS) #ifdef __cplusplus } diff -r fb80df16c4ff -r 6a9ca0177020 Lib/dis.py --- a/Lib/dis.py Tue Oct 23 21:14:34 2012 +0300 +++ b/Lib/dis.py Tue May 20 11:04:02 2014 +0200 @@ -144,6 +144,35 @@ """Print details of methods, functions, or code to stdout.""" print(code_info(co)) +def format_arg(co, arg_type, oparg, i): + text = repr(oparg) + if arg_type == 'const': + text = '%r (const#%s)' % (co.co_consts[oparg], oparg) + elif arg_type == 'name': + text = "%r (name#%s)" % (co.co_names[oparg], oparg) + elif arg_type == 'jabs': + text = '' % text + elif arg_type == 'jrel': + text = '' % (i + oparg, oparg) + elif arg_type == 'local': + text += ' (' + co.co_varnames[oparg] + ')' + elif arg_type == 'cmp': + text = repr(cmp_op[oparg]) + elif arg_type == 'free': + free = co.co_cellvars + co.co_freevars + text += ' (' + free[oparg] + ')' + elif arg_type in ('nargs', 'call_nargs'): + text += ' (%d positional, %d keyword pair)' % (oparg & 0xff, oparg >> 8) + return text + +def format_reg(co, reg): + first_register = co.co_stacksize + co.co_nlocals + len(co.co_cellvars) + len(co.co_freevars) + 1 + if reg < first_register: + return repr(co.co_varnames[reg]) + else: + reg -= first_register + return 'R%s' % reg + def disassemble(co, lasti=-1): """Disassemble a code object.""" code = co.co_code @@ -155,6 +184,9 @@ free = None while i < n: op = code[i] + operation = OPERATION_BY_CODE[op] + name = opname[op] + if i in linestarts: if i > 0: print() @@ -169,30 +201,55 @@ print(repr(i).rjust(4), end=' ') print(opname[op].ljust(20), end=' ') i = i+1 - if op >= HAVE_ARGUMENT: - oparg = code[i] + code[i+1]*256 + extended_arg + + args = [] + arg = None + for arg_type in operation.arg_types: + if arg_type == 'reg': + reg = code[i] + code[i+1] * 256 + args.append(format_reg(co, reg)) + i += 2 + elif arg_type == 'nreg8': + nreg = code[i] + i += 1 + args.append(format_arg(co, arg_type, nreg, i)) + for ireg in range(nreg): + reg = code[i] + code[i+1] * 256 + args.append(format_reg(co, reg)) + i += 2 + elif arg_type == 'nkwreg8': + nreg = code[i] + i += 1 + args.append(format_arg(co, arg_type, nreg, i)) + for ireg in range(nreg * 2): + reg = code[i] + code[i+1] * 256 + args.append(format_reg(co, reg)) + i += 2 + else: + arg = code[i] + code[i+1] * 256 + extended_arg + i += 2 + if arg_type == 'nreg': + nreg = arg + elif arg_type == 'call_nreg': + na = arg & 0xff + nk = arg >> 8 + nreg = na + 2 * nk + else: + nreg = None + args.append(format_arg(co, arg_type, arg, i)) + if nreg is not None: + for ireg in range(nreg): + reg = code[i] + code[i+1] * 256 + args.append(format_reg(co, reg)) + i += 2 + + print(', '.join(args), end=' ') + + if op == EXTENDED_ARG: + extended_arg = arg * 65536 + else: extended_arg = 0 - i = i+2 - if op == EXTENDED_ARG: - extended_arg = oparg*65536 - print(repr(oparg).rjust(5), end=' ') - if op in hasconst: - print('(' + repr(co.co_consts[oparg]) + ')', end=' ') - elif op in hasname: - print('(' + co.co_names[oparg] + ')', end=' ') - elif op in hasjrel: - print('(to ' + repr(i + oparg) + ')', end=' ') - elif op in haslocal: - print('(' + co.co_varnames[oparg] + ')', end=' ') - elif op in hascompare: - print('(' + cmp_op[oparg] + ')', end=' ') - elif op in hasfree: - if free is None: - free = co.co_cellvars + co.co_freevars - print('(' + free[oparg] + ')', end=' ') - elif op in hasnargs: - print('(%d positional, %d keyword pair)' - % (code[i-2], code[i-1]), end=' ') + print() def _disassemble_bytes(code, lasti=-1, varnames=None, names=None, @@ -252,20 +309,58 @@ labels = [] n = len(code) i = 0 + extended_arg = 0 while i < n: op = code[i] i = i+1 - if op >= HAVE_ARGUMENT: - oparg = code[i] + code[i+1]*256 - i = i+2 - label = -1 - if op in hasjrel: - label = i+oparg - elif op in hasjabs: - label = oparg - if label >= 0: - if label not in labels: - labels.append(label) + operation = OPERATION_BY_CODE[op] + + arg = None + nreg = None + for arg_type in operation.arg_types: + if arg_type == 'reg': + reg = code[i] + code[i+1] * 256 + i += 2 + elif arg_type == 'nreg8': + nreg = code[i] + i += 1 + for ireg in range(nreg): + reg = code[i] + code[i+1] * 256 + i += 2 + elif arg_type == 'nkwreg8': + nreg = code[i] + i += 1 + for ireg in range(nreg * 2): + reg = code[i] + code[i+1] * 256 + i += 2 + else: + arg = code[i] + code[i+1] * 256 + extended_arg + i += 2 + nreg = None + label = None + if arg_type == 'nreg': + nreg = arg + elif arg_type == 'call_nreg': + na = arg & 0xff + nk = arg >> 8 + nreg = na + 2 * nk + elif arg_type == 'jabs': + label = arg + elif arg_type == 'jrel': + label = i + arg + if label is not None: + if label not in labels: + labels.append(label) + if nreg is not None: + for ireg in range(nreg): + reg = code[i] + code[i+1] * 256 + i += 2 + + if op == EXTENDED_ARG: + extended_arg = arg*65536 + else: + extended_arg = 0 + return labels def findlinestarts(code): diff -r fb80df16c4ff -r 6a9ca0177020 Lib/opcode.py --- a/Lib/opcode.py Tue Oct 23 21:14:34 2012 +0300 +++ b/Lib/opcode.py Tue May 20 11:04:02 2014 +0200 @@ -6,7 +6,8 @@ __all__ = ["cmp_op", "hasconst", "hasname", "hasjrel", "hasjabs", "haslocal", "hascompare", "hasfree", "opname", "opmap", - "HAVE_ARGUMENT", "EXTENDED_ARG", "hasnargs"] + "HAVE_ARGUMENT", "EXTENDED_ARG", "USE_REGISTERS", "hasnargs", + "OPERATION_BY_CODE", "OPERATION_BY_NAME"] cmp_op = ('<', '<=', '==', '!=', '>', '>=', 'in', 'not in', 'is', 'is not', 'exception match', 'BAD') @@ -25,21 +26,107 @@ for op in range(256): opname[op] = '<%r>' % (op,) del op +HAVE_ARGUMENT = 90 # Opcodes from here have an argument: +EXTENDED_ARG = 144 +USE_REGISTERS = 148 # Opcodes from here use registers: + +class Operation: + """ + Argument types: + + - 'imm': Immediate + - 'call_nreg': Number of arguments for CALL_FUNCTION_REG or CALL_PROCEDURE_REG + - 'cmp': Compare operator + - 'const': Constat + - 'free': Free variable + - 'jabs': Jump absolute + - 'jrel': Jump relative + - 'local': Local variable + - 'name': Name + - 'nargs': Number of index and keyword arguments for CALL_FUNCTION + - 'nreg': Number of registers + - 'nreg8': Number of registers, 8 bit unsigned number + - 'nkwreg8': Number of keyword pair (key-value) registers, 8 bit unsigned + number. For example, the value 1 means 1 pair and so 2 registers. + - 'reg': Register + """ + def __init__(self, name, code, args=None): + self.code = code + self.name = name + if args: + args = tuple(args.split(',')) + else: + args = () + + arg_names = [] + arg_types = [] + for arg in args: + arg_name, arg_type = arg.split(':', 1) + arg_name = arg_name.strip() + arg_type = arg_type.strip() + + if arg_name in arg_names: + raise ValueError("duplicate argument name: %s" % (arg_name,)) + arg_names.append(arg_name) + + if arg_type == 'name': + global hasname + hasname.append(self.code) + elif arg_type == 'jrel': + global hasjrel + hasjrel.append(self.code) + elif arg_type == 'jabs': + global hasjabs + hasjabs.append(self.code) + elif arg_type == 'free': + global hasfree + hasfree.append(self.code) + elif arg_type == 'nargs': + global hasnargs + hasnargs.append(self.code) + elif arg_type == 'const': + global hasconst + hasconst.append(self.code) + elif arg_type == 'local': + global haslocal + haslocal.append(self.code) + elif arg_type == 'cmp': + global hascompare + hascompare.append(self.code) + elif arg_type not in ('imm', 'reg', 'nreg', 'call_nreg', 'nreg8', 'nkwreg8'): + raise ValueError("invalid argument type: %r" % (arg_type,)) + + if arg_type == 'local': + arg_types.append('reg') + else: + arg_types.append(arg_type) + + self.arg_names = tuple(arg_names) + self.arg_types = tuple(arg_types) + + def __repr__(self): + return '' % self.name + +OPERATION_BY_NAME = {} +OPERATION_BY_CODE = {} + +def def_operation(operation): + opname[operation.code] = operation.name + opmap[operation.name] = operation.code + OPERATION_BY_NAME[operation.name] = operation + OPERATION_BY_CODE[operation.code] = operation + def def_op(name, op): - opname[op] = name - opmap[name] = op + def_operation(Operation(name, op)) def name_op(name, op): - def_op(name, op) - hasname.append(op) + def_operation(Operation(name, op, "arg:name")) def jrel_op(name, op): - def_op(name, op) - hasjrel.append(op) + def_operation(Operation(name, op, "arg:jrel")) def jabs_op(name, op): - def_op(name, op) - hasjabs.append(op) + def_operation(Operation(name, op, "arg:jabs")) # Instruction opcodes for compiled code # Blank lines correspond to available opcodes @@ -106,27 +193,23 @@ def_op('END_FINALLY', 88) def_op('POP_EXCEPT', 89) -HAVE_ARGUMENT = 90 # Opcodes from here have an argument: - name_op('STORE_NAME', 90) # Index in name list name_op('DELETE_NAME', 91) # "" -def_op('UNPACK_SEQUENCE', 92) # Number of tuple items +def_operation(Operation('UNPACK_SEQUENCE', 92, 'arg:imm')) # Number of tuple items jrel_op('FOR_ITER', 93) -def_op('UNPACK_EX', 94) +def_operation(Operation('UNPACK_EX', 94, 'arg:imm')) name_op('STORE_ATTR', 95) # Index in name list name_op('DELETE_ATTR', 96) # "" name_op('STORE_GLOBAL', 97) # "" name_op('DELETE_GLOBAL', 98) # "" -def_op('LOAD_CONST', 100) # Index in const list -hasconst.append(100) +def_operation(Operation('LOAD_CONST', 100, 'arg:const')) # Index in const list name_op('LOAD_NAME', 101) # Index in name list -def_op('BUILD_TUPLE', 102) # Number of tuple items -def_op('BUILD_LIST', 103) # Number of list items -def_op('BUILD_SET', 104) # Number of set items -def_op('BUILD_MAP', 105) # Number of dict entries (upto 255) +def_operation(Operation('BUILD_TUPLE', 102, 'arg:imm')) # Number of tuple items +def_operation(Operation('BUILD_LIST', 103, 'arg:imm')) # Number of list items +def_operation(Operation('BUILD_SET', 104, 'arg:imm')) # Number of set items +def_operation(Operation('BUILD_MAP', 105, 'arg:imm')) # Number of dict entries (upto 255) name_op('LOAD_ATTR', 106) # Index in name list -def_op('COMPARE_OP', 107) # Comparison operator -hascompare.append(107) +def_operation(Operation('COMPARE_OP', 107, 'arg:cmp')) # Comparison operator name_op('IMPORT_NAME', 108) # Index in name list name_op('IMPORT_FROM', 109) # Index in name list @@ -144,42 +227,104 @@ jrel_op('SETUP_EXCEPT', 121) # "" jrel_op('SETUP_FINALLY', 122) # "" -def_op('LOAD_FAST', 124) # Local variable number -haslocal.append(124) -def_op('STORE_FAST', 125) # Local variable number -haslocal.append(125) -def_op('DELETE_FAST', 126) # Local variable number -haslocal.append(126) +def_operation(Operation('LOAD_FAST', 124, 'arg:local')) # Local variable number +def_operation(Operation('STORE_FAST', 125, 'arg:local')) # Local variable number +def_operation(Operation('DELETE_FAST', 126, 'arg:local')) # Local variable number -def_op('RAISE_VARARGS', 130) # Number of raise arguments (1, 2, or 3) -def_op('CALL_FUNCTION', 131) # #args + (#kwargs << 8) -hasnargs.append(131) -def_op('MAKE_FUNCTION', 132) # Number of args with default values -def_op('BUILD_SLICE', 133) # Number of items -def_op('MAKE_CLOSURE', 134) -def_op('LOAD_CLOSURE', 135) -hasfree.append(135) -def_op('LOAD_DEREF', 136) -hasfree.append(136) -def_op('STORE_DEREF', 137) -hasfree.append(137) -def_op('DELETE_DEREF', 138) -hasfree.append(138) +def_operation(Operation('RAISE_VARARGS', 130, 'arg:imm')) # Number of raise arguments (1, 2, or 3) +def_operation(Operation('CALL_FUNCTION', 131, 'arg:nargs')) # #args + (#kwargs << 8, 'nargs') +def_operation(Operation('MAKE_FUNCTION', 132, 'arg:nargs')) # Number of args with default values +def_operation(Operation('BUILD_SLICE', 133, 'arg:imm')) # Number of items +def_operation(Operation('MAKE_CLOSURE', 134, 'arg:imm')) +def_operation(Operation('LOAD_CLOSURE', 135, 'arg:free')) +def_operation(Operation('LOAD_DEREF', 136, 'arg:free')) +def_operation(Operation('STORE_DEREF', 137, 'arg:free')) +def_operation(Operation('DELETE_DEREF', 138, 'arg:free')) -def_op('CALL_FUNCTION_VAR', 140) # #args + (#kwargs << 8) -hasnargs.append(140) -def_op('CALL_FUNCTION_KW', 141) # #args + (#kwargs << 8) -hasnargs.append(141) -def_op('CALL_FUNCTION_VAR_KW', 142) # #args + (#kwargs << 8) -hasnargs.append(142) +def_operation(Operation('CALL_FUNCTION_VAR', 140, 'arg:nargs')) # #args + (#kwargs << 8) +def_operation(Operation('CALL_FUNCTION_KW', 141, 'arg:nargs')) # #args + (#kwargs << 8) +def_operation(Operation('CALL_FUNCTION_VAR_KW', 142, 'arg:nargs')) # #args + (#kwargs << 8) jrel_op('SETUP_WITH', 143) -def_op('LIST_APPEND', 145) -def_op('SET_ADD', 146) -def_op('MAP_ADD', 147) +def_operation(Operation('LIST_APPEND', 145, 'arg:imm')) +def_operation(Operation('SET_ADD', 146, 'arg:imm')) +def_operation(Operation('MAP_ADD', 147, 'arg:imm')) + +def_operation(Operation('EXTENDED_ARG', 144, 'arg:imm')) + +# Register instructions + +def_operation(Operation('PUSH_REG', 155, 'arg:reg')) +def_operation(Operation('POP_REG', 153, 'result:reg')) +def_operation(Operation('MOVE_REG', 154, 'result:reg, arg:reg')) +def_operation(Operation('CLEAR_REG', 184, 'arg:reg')) + +def_operation(Operation('LOAD_CONST_REG', 150, 'result:reg, const:const')) # Index in const list +def_operation(Operation('LOAD_GLOBAL_REG', 157, 'result:reg, global:name')) +def_operation(Operation('LOAD_CLOSURE_REG', 186, 'result:reg, closure:free')) +def_operation(Operation('LOAD_ATTR_REG', 165, 'result:reg, owner:reg, attr:name')) + +def_operation(Operation('STORE_GLOBAL_REG', 177, 'global:name, value:reg')) +def_operation(Operation('STORE_ATTR_REG', 178, 'owner:reg, attr:name, value:reg')) +def_operation(Operation('STORE_MAP_REG', 180, 'mapping:reg, key:reg, value:reg')) +def_operation(Operation('STORE_DEREF_REG', 187, 'value:reg, arg:free')) +def_operation(Operation('STORE_SUBSCR_REG', 191, 'container:reg, sub:reg, value:reg')) + +def_operation(Operation('UNARY_NOT_REG', 148, 'result:reg, arg:reg')) +def_operation(Operation('UNARY_NEGATIVE_REG', 168, 'result:reg, arg:reg')) +def_operation(Operation('UNARY_POSITIVE_REG', 203, 'result:reg, arg:reg')) + +def_operation(Operation('BINARY_ADD_REG', 152, 'result:reg, left:reg, right:reg')) +def_operation(Operation('BINARY_SUBTRACT_REG', 158, 'result:reg, left:reg, right:reg')) +def_operation(Operation('BINARY_MULTIPLY_REG', 156, 'result:reg, left:reg, right:reg')) +def_operation(Operation('BINARY_FLOOR_DIVIDE_REG', 166, 'result:reg, left:reg, right:reg')) +def_operation(Operation('BINARY_SUBSCR_REG', 164, 'result:reg, left:reg, right:reg')) +def_operation(Operation('BINARY_MODULO_REG', 190, 'result:reg, dividend:reg, divisor:reg')) +def_operation(Operation('BINARY_TRUE_DIVIDE_REG', 192, 'result:reg, left:reg, right:reg')) + -def_op('EXTENDED_ARG', 144) -EXTENDED_ARG = 144 +def_operation(Operation('INPLACE_ADD_REG', 173, 'inplace:reg, arg:reg')) +def_operation(Operation('INPLACE_SUBTRACT_REG', 175, 'inplace:reg, arg:reg')) +def_operation(Operation('INPLACE_MULTIPLY_REG', 174, 'inplace:reg, arg:reg')) +def_operation(Operation('INPLACE_FLOOR_DIVIDE_REG', 176, 'inplace:reg, arg:reg')) +def_operation(Operation('INPLACE_TRUE_DIVIDE_REG', 193, 'inplace:reg, arg:reg')) +def_operation(Operation('INPLACE_MODULO_REG', 194, 'inplace:reg, arg:reg')) + +def_operation(Operation('BUILD_SLICE2_REG', 169, 'result:reg, start:reg, stop:reg')) +def_operation(Operation('BUILD_SLICE3_REG', 205, 'result:reg, start:reg, stop:reg, step:reg')) +def_operation(Operation('BUILD_LIST_REG', 170, 'result:reg, length:nreg')) # + nreg x registers +def_operation(Operation('BUILD_TUPLE_REG', 172, 'result:reg, length:nreg')) # + nreg x registers +def_operation(Operation('BUILD_MAP_REG', 179, 'result:reg, length:imm')) + +def_operation(Operation('RETURN_VALUE_REG', 151, 'value:reg')) +def_operation(Operation('CALL_FUNCTION_REG', 162, 'result:reg, func:reg, narg:call_nreg')) # followed by arguments +def_operation(Operation('CALL_PROCEDURE_REG', 183, 'func:reg, narg:call_nreg')) # followed by arguments +def_operation(Operation('UNPACK_SEQUENCE_REG', 171, 'seq:reg, nvar:nreg')) # + nreg x registers +def_operation(Operation('UNPACK_EX_REG', 206, 'seq:reg, narg_before:nreg8, reg_list:reg, narg_after:nreg8')) -del def_op, name_op, jrel_op, jabs_op +def_operation(Operation('COMPARE_REG', 160, 'result:reg, arg:cmp, left:reg, right:reg')) +def_operation(Operation('JUMP_IF_TRUE_REG', 167, 'arg:reg, jump:jabs')) +def_operation(Operation('JUMP_IF_FALSE_REG', 161, 'arg:reg, jump:jabs')) + +def_operation(Operation('FOR_ITER_REG', 159, 'result:reg, iter:reg, jump:jrel')) +def_operation(Operation('GET_ITER_REG', 163, 'result:reg, arg:reg')) + +def_operation(Operation('MAKE_FUNCTION_REG', 181, 'result:reg, qualname:reg, code:reg, posdefaults:nreg8, kwdefaults:nkwreg8, num_annotations:nreg8')) +def_operation(Operation('MAKE_CLOSURE_REG', 185, 'result:reg, qualname:reg, code:reg, closure:reg, posdefaults:nreg8, kwdefaults:nkwreg8, num_annotations:nreg8')) +def_operation(Operation('LOAD_BUILD_CLASS_REG', 182, 'result:reg')) + +# unsorted +def_operation(Operation('DELETE_SUBSCR_REG', 188, 'container:reg, sub:reg')) +def_operation(Operation('LOAD_DEREF_REG', 189, 'result:reg, deref:free')) +def_operation(Operation('LOAD_NAME_REG', 195, 'result:reg, var:name')) +def_operation(Operation('LIST_APPEND_REG', 196, 'list:reg, value:reg')) +def_operation(Operation('MAP_ADD_REG', 204, 'mapping:reg, key: reg, value:reg')) +def_operation(Operation('STORE_NAME_REG', 197, 'var:name, value:reg')) +def_operation(Operation('IMPORT_NAME_REG', 198, 'result:reg, name:name, from:reg, level:reg')) +def_operation(Operation('IMPORT_FROM_REG', 199, 'result:reg, module:reg, name:name')) +def_operation(Operation('IMPORT_STAR_REG', 200, 'module:reg')) +def_operation(Operation('YIELD_REG', 201, 'value:reg')) +def_operation(Operation('STORE_LOCALS_REG', 202, 'locals:reg')) + +del def_op, name_op, jrel_op, jabs_op, def_operation diff -r fb80df16c4ff -r 6a9ca0177020 Lib/registervm.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Lib/registervm.py Tue May 20 11:04:02 2014 +0200 @@ -0,0 +1,2551 @@ +import collections +import opcode +import struct +import types + +DEBUG_REGALLOC = False + +# Hardcoded number of registers in frameobject.h +FRAME_NREGISTER = 256 + +DONT_USE_STACK = { + 'NOP', 'SETUP_LOOP', 'SETUP_EXCEPT', 'SETUP_FINALLY', 'JUMP_ABSOLUTE', + 'JUMP_FORWARD', 'DELETE_NAME', 'BREAK_LOOP', +} + +UNARY_REGISTER_TO_STACK = { + 'UNARY_NOT': 'UNARY_NOT_REG', + 'UNARY_POSITIVE': 'UNARY_POSITIVE_REG', + 'UNARY_NEGATIVE': 'UNARY_NEGATIVE_REG', +} + +BINARY_REGISTER_TO_STACK = { + 'BINARY_ADD': 'BINARY_ADD_REG', + 'BINARY_SUBTRACT': 'BINARY_SUBTRACT_REG', + 'BINARY_MULTIPLY': 'BINARY_MULTIPLY_REG', + 'BINARY_FLOOR_DIVIDE': 'BINARY_FLOOR_DIVIDE_REG', + 'BINARY_TRUE_DIVIDE': 'BINARY_TRUE_DIVIDE_REG', + 'BINARY_MODULO': 'BINARY_MODULO_REG', + 'BINARY_SUBSCR': 'BINARY_SUBSCR_REG', + + 'INPLACE_ADD': 'INPLACE_ADD_REG', + 'INPLACE_SUBTRACT': 'INPLACE_SUBTRACT_REG', + 'INPLACE_MULTIPLY': 'INPLACE_MULTIPLY_REG', + 'INPLACE_FLOOR_DIVIDE': 'INPLACE_FLOOR_DIVIDE_REG', + 'INPLACE_TRUE_DIVIDE': 'INPLACE_TRUE_DIVIDE_REG', + 'INPLACE_MODULO': 'INPLACE_MODULO_REG', +} + +# Replace a register value (don't modify the value inplace) +OPCODES_REPLACE_REG = { + 'LOAD_CONST_REG', 'LOAD_GLOBAL_REG', 'LOAD_ATTR_REG', 'LOAD_CLOSURE_REG', + + 'UNARY_NOT_REG', 'UNARY_POSITIVE_REG', 'UNARY_NEGATIVE_REG', + + 'MOVE_REG', + 'COMPARE_REG', + 'CALL_FUNCTION_REG', + 'FOR_ITER_REG', + 'BUILD_TUPLE_REG', 'BUILD_LIST_REG', 'BUILD_SLICE2_REG', 'BUILD_SLICE3_REG', 'BUILD_MAP_REG', + 'UNPACK_SEQUENCE_REG', + 'POP_REG', + 'MAKE_CLOSURE_REG', 'MAKE_FUNCTION_REG', + 'IMPORT_NAME_REG', 'IMPORT_FROM_REG', + 'GET_ITER_REG', + + 'BINARY_ADD_REG', + 'BINARY_SUBTRACT_REG', + 'BINARY_MULTIPLY_REG', + 'BINARY_FLOOR_DIVIDE_REG', + 'BINARY_TRUE_DIVIDE_REG', + 'BINARY_MODULO_REG', + 'BINARY_SUBSCR_REG', +} | set(UNARY_REGISTER_TO_STACK.values()) + +# Replace or modify (inplace) a register value +OPCODES_WRITE_INTO = OPCODES_REPLACE_REG | { + 'INPLACE_ADD_REG', + 'INPLACE_SUBTRACT_REG', + 'INPLACE_MULTIPLY_REG', + 'INPLACE_FLOOR_DIVIDE_REG', + 'INPLACE_TRUE_DIVIDE_REG', + 'INPLACE_MODULO_REG', +} + +LOAD_REG_OPS = { + 'LOAD_CONST_REG', 'LOAD_ATTR_REG', 'LOAD_GLOBAL_REG', +} + +LABEL = object() + +def patch_code_obj(code, bytecode=None, const=None): + """ + Usage: patch_code_obj(code, bytecode=new_bytecode) + Usage: patch_code_obj(code, const={0:new_const}) + """ + if bytecode is None: + bytecode = code.co_code + consts = code.co_consts + if const is not None: + consts = list(consts) + for index, value in const.items(): + consts[index] = value + consts = tuple(consts) + args = ( + code.co_argcount, + code.co_kwonlyargcount, + code.co_nlocals, + code.co_stacksize, + code.co_flags, + bytecode, + consts, + code.co_names, + code.co_varnames, + code.co_filename, + code.co_name, + code.co_firstlineno, + code.co_lnotab, + code.co_freevars, + code.co_cellvars, + ) + return types.CodeType(*args) + +def tag_loop(node): + node.block_type = 'loop start' + node.loop_start = node + seen = set() + _tag_loop(node, node, seen) + +def _tag_loop(node, loop_start, seen): + if node in seen: + return + node.loop_start = loop_start + seen.add(node) + for link in node.out_links: + if link.link_type == 'except StopIteration': + continue + _tag_loop(link.block, loop_start, seen) + +def detect_loops(node, seen): + if node in seen: + return + seen.add(node) + link = node.get_out_link('except StopIteration') + if link is not None: + link.block.block_type = 'end of loop #%s' % (1+node.index) + tag_loop(node) + for link in node.out_links: + detect_loops(link.block, seen) + +class RegisterTracker: + def __init__(self): + self.mapping = {} + self.reverse = collections.defaultdict(set) + # reg => (block, index) + self.clear_reg = {} + + def step(self, block, index, instr): + if instr.name == "CLEAR_REG": + reg = instr[0] + self.clear_reg[reg] = (block, index) + + def get(self, key): + reg = self.mapping[key] + if reg in self.clear_reg: + block, index = self.clear_reg[reg] + del self.clear_reg[reg] + instr = block[index] + block[index] = instr.copy('NOP') + return reg + + def __setitem__(self, key, reg): + self.mapping[key] = reg + self.reverse[reg].add(key) + + def __contains__(self, key): + return key in self.mapping + + def __repr__(self): + return '' % (self.mapping,) + +class Config: + def __init__(self): + # safe options: + self.merge_duplicate_load_const = True + self.remove_dead_code = True + self.quiet = False + self.debug = False + + # may be slower (in the current implementation) because the constant + # if maybe not used in a code path + self.move_load_const = False + + # Move LOAD_ATTR_REG and LOAD_GLOBAL_REG outside loops, can be slower + # if there are not used in a code path + self.move_load_attr = False + self.move_load_global = False + + self.merge_duplicate_load_attr = False + self.merge_duplicate_load_global = False + + def enable_unsafe_optimizations(self): + self.move_load_const = True + self.merge_duplicate_load_attr = True + self.merge_duplicate_load_global = True + + def enable_buggy_optimizations(self): + # Fix moving LOAD_ATTR_REG: only do that when calling methods + # + # result = Result() + # while 1: + # if result.done: + # break + # func(result) + self.move_load_attr = True + + # Don't move globals out of if. Only out of loops? + # subprocess.py: + # + # if mswindows: + # if p2cwrite != -1: + # p2cwrite = msvcrt.open_osfhandle(p2cwrite.Detach(), 0) + self.move_load_global = True + + def check(self): + if self.merge_duplicate_load_const and not self.move_load_const: + raise Exception("cannot merge LOAD_CONST_REG if there are not moved") + +class Argument: + def __init__(self, converter, value, arg_type, struct_format): + self.converter = converter + self.value = value + self.arg_type = arg_type + self.struct_format = struct_format + + def __hash__(self): + return hash((self.__class__, self.value)) + + def __eq__(self, other): + return self.__class__ == other.__class__ and self.value == other.value + + @staticmethod + def disassemble(converter, bytecode, offset, arg_type, extended_arg): + if arg_type == 'reg': + arg, offset = Register.disassemble(converter, bytecode, offset) + elif arg_type in ('nreg8', 'nkwreg8'): + arg, offset = Immediate8.disassemble(converter, bytecode, offset, arg_type) + else: + arg, offset = Immediate.disassemble(converter, bytecode, offset, arg_type) + arg.value += extended_arg + return arg, offset + + def __repr__(self): + return '<%s arg_type=%r value=%r>' % (self.__class__.__name__, self.arg_type, self.value) + + def format(self, pos=None): + raise NotImplementedError() + + def __str__(self): + return self.format() + + def get_size(self): + return struct.calcsize(self.struct_format) + + def compile(self): + try: + return struct.pack(self.struct_format, self.value) + except struct.error: + raise ValueError("invalid argument value: " + "arg=%r, value=%r, struct format=%r" + % (self, self.value, self.struct_format)) from None + + +class Immediate(Argument): + def __init__(self, converter, value, arg_type): + Argument.__init__(self, converter, value, arg_type, 'H') + + @staticmethod + def disassemble(converter, bytecode, offset, arg_type): + value = bytecode[offset] + bytecode[offset+1] * 256 + offset += 2 + arg = Immediate(converter, value, arg_type) + return arg, offset + + def format(self, pos=None): + arg = self.value + code = self.converter.code + if self.arg_type == "const": + return '%r (const#%s)' % (code.co_consts[arg], arg) + if self.arg_type == "name": + return '%r (name#%s)' % (code.co_names[arg], arg) + if self.arg_type == 'jrel': + if isinstance(arg, Label): + return '' % (arg.format(pos=pos),) + elif pos is not None: + return '' % (pos + arg, arg) + else: + return '' % arg + if self.arg_type == 'jabs': + if isinstance(arg, Label): + return '' % arg.format(pos=pos) + else: + return str(arg) + if self.arg_type == "cmp": + return repr(opcode.cmp_op[arg]) + if self.arg_type in ('nargs', 'call_nreg'): + # FIXME: split into two Immediate8 + na = arg & 0xff + nk = arg >> 8 + if nk: + return '(%d positional, %d keyword pair)' % (na, nk) + else: + return '(%d positional)' % (na,) + if self.arg_type == "free": + mapping = code.co_cellvars + code.co_freevars + return "%r (%s)" % (mapping[arg], arg) + if self.arg_type in ('nreg', 'imm'): + return str(arg) + raise ValueError("unknown immediate type: %s" % self.arg_type) + + def compile(self): + if isinstance(self.value, Label): + raise ValueError("labels are not compilable") + return Argument.compile(self) + +class Immediate8(Argument): + def __init__(self, converter, value, arg_type): + Argument.__init__(self, converter, value, arg_type, 'B') + + @staticmethod + def disassemble(converter, bytecode, offset, arg_type): + value = bytecode[offset] + offset += 1 + arg = Immediate8(converter, value, arg_type) + return arg, offset + + def format(self, pos=None): + return str(self.value) + +class Label(Argument): + def __init__(self, converter, value, arg_type, label_type): + # arg_type of the original argument + Argument.__init__(self, converter, value, arg_type, '') + self.label_type = label_type + + def get_size(self): + raise ValueError("labels are not compilable") + + def format(self, pos=None): + return str(self.value) + +class Register(Argument): + def __init__(self, converter, value): + Argument.__init__(self, converter, value, 'reg', 'H') + + @staticmethod + def disassemble(converter, bytecode, offset): + reg = bytecode[offset] + bytecode[offset+1] * 256 + offset += 2 + if reg < converter._first_register: + arg = Local(converter, reg - converter._first_local) + else: + arg = Register(converter, reg - converter._first_register) + return arg, offset + + def __repr__(self): + return '' % self.format() + + def format(self, pos=None): + return 'R%s' % self.value + + def compile(self): + value = self.converter._first_register + self.value + try: + return struct.pack(self.struct_format, value) + except struct.error: + raise ValueError("invalid register: value=%r" % (self.value,)) from None + +class Local(Argument): + def __init__(self, converter, value): + Argument.__init__(self, converter, value, 'local', 'H') + index = converter._first_local + self.value + self.name = converter.code.co_varnames[index] + + def __repr__(self): + return '' % (self.name,) + + def format(self, pos=None): + return repr(self.name) + + def compile(self): + value = self.converter._first_local + self.value + try: + return struct.pack(self.struct_format, value) + except struct.error: + raise ValueError("invalid local: value=%r" % (self.value,)) from None + +class Instruction: + def __init__(self, converter, code=None, name=None): + self.converter = converter + if name is not None: + self._name = name + self._code = opcode.opmap[name] + else: + self._code = code # int + self._name = opcode.opname[code] # str + self.arguments = [] + + def __getitem__(self, index): + return self.arguments[index] + + def __setitem__(self, index, arg): + self.arguments[index] = arg + + def copy(self, opcode_name, *arguments): + code = opcode.opmap[opcode_name] + instr = Instruction(self.converter, code) + instr.arguments = list(arguments) + return instr + + def real_copy(self): + instr = Instruction(self.converter, self._code) + instr.arguments = list(self.arguments) + return instr + + @property + def name(self): + return self._name + + def get_registers(self): + regs = [] + for arg in self: + if isinstance(arg, (Register, Local)): + regs.append(arg) + return regs + + def is_terminal(self): + """ + Is the instruction the last of a block? + """ + return self.name in ( + "RETURN_VALUE", "RETURN_VALUE_REG", "RAISE_VARARGS", + "JUMP_ABSOLUTE", "JUMP_FORWARD", "CONTINUE_LOOP", "BREAK_LOOP") + + def is_cond_jump(self): + return self.name in ( + "POP_JUMP_IF_TRUE", "POP_JUMP_IF_FALSE", + "JUMP_IF_TRUE_OR_POP", "JUMP_IF_FALSE_OR_POP", + "JUMP_IF_TRUE_REG", "JUMP_IF_FALSE_REG") + + def _compile(self, only_size): + arguments = self.arguments + extended = None + for iarg, arg in enumerate(arguments): + if (isinstance(arg, Immediate) + and not isinstance(arg.value, Label) + and arg.value > 0xffff): + arguments = list(arguments) + if extended is not None: + raise ValueError("two arguments are extended") + extended = arg.value >> 16 + value = arg.value & 0xffff + arguments[iarg] = Immediate(arg.converter, value, arg.arg_type) + if extended is not None: + if only_size: + datalen = 3 + else: + data = struct.pack('=BH', opcode.EXTENDED_ARG, extended) + else: + if only_size: + datalen = 0 + else: + data = b'' + if only_size: + datalen += 1 + else: + data += struct.pack('B', self._code) + for arg in arguments: + if only_size: + datalen += arg.get_size() + else: + data += arg.compile() + if only_size: + return datalen + else: + return data + + def compile(self): + try: + return self._compile(False) + except ValueError as err: + raise ValueError("failed to compile %r: %s" % (self, err)) from None + + def get_size(self): + return self._compile(True) + + @staticmethod + def disassemble(converter, bytecode, offset, extended_arg): + code = bytecode[offset] + offset += 1 + + operation = opcode.OPERATION_BY_CODE[code] + arguments = [] + for arg_type in operation.arg_types: + arg, offset = Argument.disassemble(converter, bytecode, offset, arg_type, extended_arg) + arguments.append(arg) + + nreg = None + if arg_type == 'nreg': + nreg = arg.value + elif arg_type == 'call_nreg': + arg = arg.value + na = arg & 0xff + nk = arg >> 8 + nreg = na + 2 * nk + elif arg_type == 'nreg8': + nreg = arg.value + elif arg_type == 'nkwreg8': + nreg = arg.value * 2 + if nreg is not None: + for reg in range(nreg): + arg, offset = Register.disassemble(converter, bytecode, offset) + arguments.append(arg) + + instr = Instruction(converter, code) + instr.arguments.extend(arguments) + return instr, offset + + def format(self, pos=None, with_name=False): + text = self._name + args = [] + descr = opcode.OPERATION_BY_CODE[self._code] + arg_names = descr.arg_names + ireg = 0 + unamed = 0 + for arg in self.arguments: + arg_name = None + if unamed: + unamed -= 1 + elif ireg < len(arg_names): + arg_name = arg_names[ireg] + ireg += 1 + arg_str = arg.format(pos=pos) + args.append((arg_name, arg_str)) + if arg.arg_type == 'nreg8': + unamed = arg.value + if args: + text_args = [] + for name, value in args: + if with_name and name: + text_args.append("%s=%s" % (name, value)) + else: + text_args.append(value) + text += ' ' + ', '.join(text_args) + return text + + def use_stack(self): + if self.name in DONT_USE_STACK: + return False + descr = opcode.OPERATION_BY_CODE[self._code] + for arg_type in descr.arg_types: + if arg_type == 'reg': + return False + return True + + def _is_reg_modified(self, reg, operation_set): + if self.name == 'UNPACK_SEQUENCE_REG': + for index, arg in enumerate(self.arguments[2:]): + if arg == reg: + return 2+index, True + elif self.name in operation_set: + if self.arguments[0] == reg: + return 0, True + return False, None + + def is_reg_replaced(self, reg): + """ + Check if the value of the specified register is replaced + """ + return self._is_reg_modified(reg, OPCODES_REPLACE_REG)[1] + + def is_reg_modified(self, reg): + """ + Check if the specified register is modified in-place or if its value is + replaced + """ + return self._is_reg_modified(reg, OPCODES_WRITE_INTO)[1] + + def index_modified_reg(self, reg): + index = self._is_reg_modified(reg, OPCODES_WRITE_INTO)[0] + if index is None: + raise ValueError("the register is not modified") + return index + + def is_reg_used(self, reg): + return any( + isinstance(arg, (Register, Local)) and arg == reg + for arg in self.arguments) + + def __str__(self): + return self.format() + + def __repr__(self): + return '<%s>' % self.__str__() + +def disassemble(obj): + code = obj.code.co_code + instructions = [] + n = len(code) + i = 0 + extended_arg = 0 + while i < n: + instr, i = Instruction.disassemble(obj, code, i, extended_arg) + if instr.name == 'EXTENDED_ARG': + extended_arg = instr[-1].value * 65536 + else: + instructions.append(instr) + extended_arg = 0 + return instructions + + +class StackItem: + def __init__(self, instr_block, instr_index, arg): + self.instr_block = instr_block + self.instr_index = instr_index + self.clear_block = None + self.clear_index = None + self.arg = arg + + def copy(self): + item = StackItem(self.instr_block, self.instr_index, self.arg) + item.clear_block = self.clear_block + item.clear_index = self.clear_index + return item + + def __repr__(self): + return repr(self.arg) + +class Stack: + def __init__(self): + self._stack = [] + self.has_yield_result = False + + def copy(self): + stack = Stack() + stack._stack = [item.copy() for item in self._stack] + stack.has_yield_result = self.has_yield_result + return stack + + def __len__(self): + return len(self._stack) + + def __repr__(self): + return '' % self._stack + + def clear(self): + del self._stack[:] + + def push(self, block, index, arg): + if not isinstance(arg, (Local, Register)): + raise TypeError("unsupported stack value: %s" % arg) + item = StackItem(block, index, arg) + self._stack.append(item) + + def clear_reg(self, block, index, arg): + for item in reversed(self._stack): + if item.arg != arg: + continue + item.clear_block = block + item.clear_index = index + break + + def pop(self): + del self._stack[-1] + + def peek(self, index): + if not(0 <= (index-1) <= len(self)): + raise IndexError("index out of the stack: %s (stack length: %s)" + % (index, len(self))) + return self._stack[-index+1].arg + + def read(self, nreg): + if not nreg: + return [], [] + if len(self._stack) < nreg or self.has_yield_result: + raise ValueError("stack contains less than %s registers" % nreg) + regs = [] + clear = [] + for item in self._stack[-nreg:]: + block = item.instr_block + index = item.instr_index + block[index] = block[index].copy('NOP') + if item.clear_block is not None: + block = item.clear_block + index = item.clear_index + block[index] = block[index].copy('NOP') + clear.append(item.arg) + regs.append(item.arg) + del self._stack[-nreg:] + return regs, clear + + def clear_instrs(self, regs, instr): + if not regs: + return () + return tuple( + instr.copy('CLEAR_REG', reg) + for reg in regs) + +class Link: + def __init__(self, link_type, block): + self.link_type = link_type + self.block = block + + def __repr__(self): + return '' % (self.link_type, self.block) + + +class Block: + def __init__(self, converter, instructions, block_index): + self.converter = converter + self.instructions = instructions + self.code = converter.code + self.config = converter.config + # FIXME: don't rely on the block index? + self.index = block_index + self.block_type = 'unknown' + # if the block is part of a loop: loop_start is the first block + # of the loop, None otherwise + self.loop_start = None + + self.in_links = [] + self.out_links = [] + + # FIXME: remove this deprecated property? + @property + def in_loop(self): + return (self.loop_start is not None) + + def add_link(self, link): + self.out_links.append(link) + reverse_link = Link(link.link_type, self) + dest_block = link.block + dest_block.in_links.append(reverse_link) + + def _get_link(self, links, link_type): + for link in links: + if link.link_type == link_type: + return link + return None + + def get_in_link(self, link_type): + return self._get_link(self.in_links, link_type) + + def get_out_link(self, link_type): + return self._get_link(self.out_links, link_type) + + def unref_block(self, removed_block): + self.in_links = [ + link for link in self.in_links + if link.block != removed_block] + self.out_links = [ + link for link in self.out_links + if link.block != removed_block] + + def __str__(self): + text = 'block #%s %s' % (self.index+1, self.block_type) + if self.loop_start is not None: + text += ' (in loop #%s)' % (1 + self.loop_start.index) + return text + + def __repr__(self): + return '' % (self.index+1, self.block_type) + + def __len__(self): + return len(self.instructions) + + def __getitem__(self, index): + return self.instructions[index] + + def __setitem__(self, index, new_instr): + self.instructions[index] = new_instr + + def __delitem__(self, index): + del self.instructions[index] + + def get_size(self): + return sum(instr.get_size() for instr in self) + + def dump(self, prefix=None, with_name=False, pos=0): + if prefix: + print("=== %s ===" % prefix) + for instr in self.instructions: + size = instr.get_size() + print("%3i: %s" % (pos, instr.format(pos+size, with_name=with_name))) + pos += size + return pos + + def iter_instr(self, start=None, end=None, backward=False, + # stop before entering a try or except block + stop_on_try=False, + # ignore CLEAR_REG instructions + ignore_clear=False, + # FIXME: remove unused stop_at + # stop at the specified index of the current block + stop_at=None, + # if we started in the middle of a loop, don't restart from + # the beginning of the loop + loop=True, + # ignore conditional branches and set loop=False + move_instr=False, + # stop at the end of the current loop + only_loop=False): + if only_loop and not self.in_loop: + raise ValueError("%s is not part of a loop" % self) + seen = set() + if stop_at is not None: + seen.add((self, stop_at, self[stop_at])) + # FIXME: implement only_loop + for block in self.converter.iter_blocks(self, backward, stop_on_try, move_instr, loop=loop): + if block is self: + instr_it = self._iter_instr(start=start, end=end, backward=backward) + start = None + end = None + else: + instr_it = block._iter_instr(backward=backward) + for index, instr in instr_it: + key = (block, index, instr) + if key in seen: + break + seen.add(key) + if not ignore_clear or instr.name != "CLEAR_REG": + yield block, index, instr + + def get_block_after_loop(self): + if not self.in_loop: + raise ValueError("%s is not part of a loop" % self) + start = self.loop_start + for link in start.out_links: + if link.link_type == 'except StopIteration': + return link.block + raise ValueError("cannot find the block after block %s" % self) + + def _iter_instr(self, start=None, end=None, backward=False): + if start is None: + start = 0 + if end is None: + end = len(self) + if not backward: + it_slice = range(start, end, 1) + else: + it_slice = range(end-1, start-1, -1) + for index in it_slice: + instr = self[index] + yield index, instr + + def is_reg_replaced(self, reg, start=0, end=None, **options): + for block, index, instr in self.iter_instr(start, end, **options): + if instr.is_reg_replaced(reg): + return True + return False + + return any( + instr.is_reg_replaced(reg) + for block, index, instr in self.iter_instr(start, end)) + + def is_reg_modified(self, reg, start=0, end=None, **options): + return any( + instr.is_reg_modified(reg) + for block, index, instr in self.iter_instr(start, end, **options)) + + def is_reg_used(self, reg, start=0, end=None, **options): + return any( + instr.is_reg_used(reg) + for block, index, instr in self.iter_instr(start, end, **options)) + + def replace_register(self, first_block, index, oldreg, newreg): + # Replace R2 with R1: + # + # MOVE_REG R2, R1 + # CLEAR_REG R2 + # BINARY_ADD_REG total, R2, R3 + # CLEAR_REG R2 + # => + # MOVE_REG R1, R1 + # NOP + # BINARY_ADD_REG total, R1, R3 + # CLEAR_REG R1 + # + # Keep the last CLEAR_REG, remove others + previous_clear_reg = None + instr_it = first_block.iter_instr(index) + for block, index, instr in instr_it: + if instr.name == "CLEAR_REG": + if not isinstance(newreg, Local): + # newreg is a register + instr.arguments = [ + newreg if isinstance(arg, (Register, Local)) and arg == oldreg else arg + for arg in instr.arguments] + if instr[0] == newreg: + if previous_clear_reg is not None: + prev_block, prev_index = previous_clear_reg + prev_block[prev_index] = prev_block[prev_index].copy("NOP") + previous_clear_reg = (block, index) + else: + # newreg is a local + if instr[0] == oldreg: + block[index] = instr.copy("NOP") + else: + instr.arguments = [ + newreg if isinstance(arg, (Register, Local)) and arg == oldreg else arg + for arg in instr.arguments] + + def replace_with_local(self, first_block, index, oldreg, newreg): + # Replace R2 with 'x': + # + # MOVE_REG R2, R1 + # CLEAR_REG R2 + # BINARY_ADD_REG R2, 'total', R3 + # CLEAR_REG R3 + # CLEAR_REG R2 + # => + # MOVE_REG 'x', R1 + # NOP + # BINARY_ADD_REG 'x', 'total', R3 + # CLEAR_REG R3 + # NOP + for block, index, instr in first_block.iter_instr(index, loop=False): + if instr.name == "CLEAR_REG": + if instr[0] == oldreg: + block[index] = instr.copy("NOP") + else: + instr.arguments = [ + newreg if isinstance(arg, (Register, Local)) and arg == oldreg else arg + for arg in instr.arguments] + + def new_register(self): + return self.converter.new_register() + + def convert_step1(self): + self._patch(self.step1) + + def convert_with_stack(self, stack): + self.stack = stack + self._patch_stack(self.step_with_stack) + + def convert_move_instr(self): + self._patch(self.step_move_instr) + + def _patch(self, step): + index = 0 + while index < len(self): + instr = self[index] + diff = step(instr, index) + if diff is None: + index += 1 + else: + index = max(index+diff, 0) + + def _patch_stack(self, step): + index = 0 + while index < len(self): + instr = self[index] + new_index = step(instr, index) + if new_index is not None: + index = new_index + else: + index += 1 + + def step1(self, instr, index): + # Rewrite stack based bytecode to register-based bytecode + if instr.name == 'LOAD_FAST': + instr = instr.copy('PUSH_REG', instr[0]) + self[index] = instr + return 1 + + if instr.name == 'DUP_TOP': + reg = self.new_register() + self[index:index+1] = ( + instr.copy('POP_REG', reg), + instr.copy('PUSH_REG', reg), + instr.copy('PUSH_REG', reg), + instr.copy('CLEAR_REG', reg), + ) + return 3 + + if instr.name == 'LOAD_CONST': + const = instr[0] + reg = self.new_register() + self[index:index+1] = ( + instr.copy('LOAD_CONST_REG', reg, instr[0]), + instr.copy('PUSH_REG', reg), + instr.copy('CLEAR_REG', reg), + ) + return 2 + + if instr.name == 'LOAD_GLOBAL': + result = self.new_register() + self[index:index+1] = ( + instr.copy('LOAD_GLOBAL_REG', result, instr[0]), + instr.copy('PUSH_REG', result), + instr.copy('CLEAR_REG', result), + ) + return 2 + + if instr.name == 'LOAD_DEREF': + reg = self.new_register() + self[index:index+1] = ( + instr.copy('LOAD_DEREF_REG', reg, instr[0]), + instr.copy('PUSH_REG', reg), + instr.copy('CLEAR_REG', reg), + ) + return 2 + + if instr.name == 'STORE_FAST': + instr = instr.copy('POP_REG', instr[0]) + self[index] = instr + return 1 + + if instr.name == 'STORE_GLOBAL': + reg = self.new_register() + self[index:index+1] = ( + instr.copy('POP_REG', reg), + instr.copy('STORE_GLOBAL_REG', instr[0], reg), + instr.copy('CLEAR_REG', reg), + ) + return 2 + + if instr.name == 'STORE_NAME': + reg = self.new_register() + self[index:index+1] = ( + instr.copy('POP_REG', reg), + instr.copy('STORE_NAME_REG', instr[0], reg), + instr.copy('CLEAR_REG', reg), + ) + return 2 + + if instr.name == 'STORE_DEREF': + reg = self.new_register() + self[index:index+1] = ( + instr.copy('POP_REG', reg), + instr.copy('STORE_DEREF_REG', reg, instr[0]), + instr.copy('CLEAR_REG', reg), + ) + return 2 + + if instr.name == 'BUILD_MAP': + reg = self.new_register() + self[index:index+1] = ( + instr.copy('BUILD_MAP_REG', reg, instr[0]), + instr.copy('PUSH_REG', reg), + instr.copy('CLEAR_REG', reg), + ) + return 2 + + if instr.name == 'LOAD_BUILD_CLASS': + reg = self.new_register() + self[index:index+1] = ( + instr.copy('LOAD_BUILD_CLASS_REG', reg), + instr.copy('PUSH_REG', reg), + instr.copy('CLEAR_REG', reg), + ) + return 2 + + if instr.name in ('LOAD_CLOSURE', 'LOAD_NAME'): + reg = self.new_register() + if instr.name == 'LOAD_NAME': + new_name = 'LOAD_NAME_REG' + else: + new_name = 'LOAD_CLOSURE_REG' + self[index:index+1] = ( + instr.copy(new_name, reg, instr[0]), + instr.copy('PUSH_REG', reg), + instr.copy('CLEAR_REG', reg), + ) + return 2 + + if instr.name == 'ROT_TWO': + reg1 = self.new_register() + reg2 = self.new_register() + self[index:index+1] = ( + instr.copy('POP_REG', reg1), # top + instr.copy('POP_REG', reg2), # second + instr.copy('PUSH_REG', reg1), # second + instr.copy('PUSH_REG', reg2), # top + instr.copy('CLEAR_REG', reg1), + instr.copy('CLEAR_REG', reg2), + ) + return 4 + + if instr.name == 'ROT_THREE': + reg1 = self.new_register() + reg2 = self.new_register() + reg3 = self.new_register() + self[index:index+1] = ( + instr.copy('POP_REG', reg1), # top + instr.copy('POP_REG', reg2), # second + instr.copy('POP_REG', reg3), # third + instr.copy('PUSH_REG', reg1), # third + instr.copy('PUSH_REG', reg3), # second + instr.copy('PUSH_REG', reg2), # top + instr.copy('CLEAR_REG', reg1), + instr.copy('CLEAR_REG', reg2), + instr.copy('CLEAR_REG', reg3), + ) + return 4 + + if instr.name in UNARY_REGISTER_TO_STACK: + reg1 = self.new_register() + reg2 = self.new_register() + new_opcode = UNARY_REGISTER_TO_STACK[instr.name] + self[index:index+1] = ( + instr.copy('POP_REG', reg1), + instr.copy(new_opcode, reg2, reg1), + instr.copy('CLEAR_REG', reg1), + instr.copy('PUSH_REG', reg2), + instr.copy('CLEAR_REG', reg2), + ) + return index + + if instr.name == 'GET_ITER': + reg1 = self.new_register() + result = self.new_register() + self[index:index+1] = ( + instr.copy('POP_REG', reg1), + instr.copy('GET_ITER_REG', result, reg1), + instr.copy('CLEAR_REG', reg1), + instr.copy('PUSH_REG', result), + instr.copy('CLEAR_REG', result), + ) + return index + + def step_with_stack(self, instr, index): + if (instr.name in BINARY_REGISTER_TO_STACK + and len(self.stack) >= 2): + # PUSH_reg left + # PUSH_reg right + # BINARY_ADD + # => + # BINARY_ADD_REG reg3, left, right + # PUSH_REG reg3 + stack, clear = self.stack.read(2) + left = stack[0] + right = stack[1] + new_opcode = BINARY_REGISTER_TO_STACK[instr.name] + if not new_opcode.startswith('INPLACE_'): + result = self.new_register() + new_instr = instr.copy(new_opcode, result, left, right) + self[index:index+1] = ( + (new_instr,) + + self.stack.clear_instrs(clear, instr) + + (instr.copy('PUSH_REG', result), + instr.copy('CLEAR_REG', result)) + ) + else: + result = left + new_instr = instr.copy(new_opcode, left, right) + self[index:index+1] = ( + (new_instr,) + + (instr.copy('PUSH_REG', result),) + + self.stack.clear_instrs(clear, instr) + ) + return index + + if (instr.name == 'FOR_ITER' + and len(self.stack) >= 1): + stack, clear = self.stack.read(1) + # PUSH_REG iterator + # FOR_ITER label + # => + # FOR_ITER_REG value, iterator, label + # PUSH_REG value + # CLEAR_REG value + # ... + # CLEAR_REG iterator + done_link = self.get_out_link('except StopIteration') + if done_link is None: + raise ValueError("%s has no StopIteration block" % self) + iterator = stack[0] + result = self.new_register() + self[index:index+1] = ( + instr.copy('FOR_ITER_REG', result, iterator, instr[0]), + instr.copy('PUSH_REG', result), + instr.copy('CLEAR_REG', result), + ) + clear_instrs = self.stack.clear_instrs(clear, instr) + if clear_instrs: + done_link.block[0:0] = clear_instrs + return index + + if (instr.name == 'LOAD_ATTR' + and len(self.stack) >= 1): + # PUSH_REG reg1 + # LOAD_ATTR name + # => + # LOAD_ATTR_REG reg0, reg1, name + # PUSH_REG reg0 + stack, clear = self.stack.read(1) + attr = instr[0] + reg_owner = stack[0] + result = self.new_register() + self[index:index+1] = ( + (instr.copy('LOAD_ATTR_REG', result, reg_owner, instr[0]),) + + self.stack.clear_instrs(clear, instr) + + (instr.copy('PUSH_REG', result), + instr.copy('CLEAR_REG', result),) + ) + return index + + if (instr.name == "POP_TOP" + and len(self.stack) >= 1): + # PUSH_REG reg + # POP_TOP + # => + # CLEAR_REG reg + stack, clear = self.stack.read(1) + reg = stack[0] + self[index:index+1] = ( + instr.copy('CLEAR_REG', reg), + ) + self.stack.clear_instrs(clear, instr) + return index + + if (instr.name == "YIELD_VALUE" + and len(self.stack) >= 1): + stack, clear = self.stack.read(1) + reg = stack[0] + result = self.new_register() + self[index:index+1] = ( + (instr.copy('YIELD_REG', reg),) + + self.stack.clear_instrs(clear, instr) + + (instr.copy('POP_REG', result), + instr.copy('PUSH_REG', result), + instr.copy('CLEAR_REG', result),) + ) + return index + + if (instr.name in ("JUMP_IF_TRUE_OR_POP", "JUMP_IF_FALSE_OR_POP") + and len(self.stack) >= 1): + # PUSH_REG reg + # JUMP_IF_FALSE_OR_POP label + # => + # JUMP_IF_FALSE_REG reg, label + stack, clear = self.stack.read(1) + # FIXME: factorize with ('POP_JUMP_IF_FALSE', 'POP_JUMP_IF_TRUE') + if index != len(self)-1: + raise ValueError("%s is not the last instruction of %s" % (instr, self)) + else_link = self.get_out_link('jump else') + if else_link is None: + raise ValueError("%s is not linked to an else block" % self) + + reg = stack[0] + if instr.name == "JUMP_IF_TRUE_OR_POP": + new_name = 'JUMP_IF_TRUE_REG' + else: + new_name = 'JUMP_IF_FALSE_REG' + self[index:index+1] = ( + instr.copy(new_name, reg, instr[0]), + ) + clear_instrs = self.stack.clear_instrs(clear, instr) + if clear_instrs: + else_link.block[0:0] = clear_instrs + return index + + if (instr.name in ('POP_JUMP_IF_FALSE', 'POP_JUMP_IF_TRUE') + and len(self.stack) >= 1): + stack, clear = self.stack.read(1) + # FIXME: factorize with ("JUMP_IF_TRUE_OR_POP", "JUMP_IF_FALSE_OR_POP") + if index != len(self)-1: + raise ValueError("%s is not the last instruction of %s" % (instr, self)) + links = self.out_links + if (len(links) != 2 + or set(link.link_type for link in links) != {'jump if', 'jump else'}): + raise ValueError("%s has invalid links for %s: %s" % (self, instr, links)) + + # PUSH_REG reg + # POP_JUMP_IF_FALSE cmp + # => + # JUMP_IF_FALSE_REG reg, cmp + reg = stack[0] + if instr.name == 'POP_JUMP_IF_TRUE': + new_opcode = 'JUMP_IF_TRUE_REG' + else: + new_opcode = 'JUMP_IF_FALSE_REG' + jump_if = instr.copy(new_opcode, reg, instr[0]) + self[index:index+1] = ( + jump_if, + ) + clear_instrs = self.stack.clear_instrs(clear, instr) + if clear_instrs: + for link in links: + link.block[0:0] = [instr.real_copy() for instr in clear_instrs] + return index + + + if (instr.name == 'STORE_ATTR' + and len(self.stack) >= 2): + # PUSH_REG reg_obj + # PUSH_REG reg_value + # STORE_ATTR name + # => + # STORE_ATTR_REG reg_obj, name, reg_value + stack, clear = self.stack.read(2) + owner = stack[1] + attr = instr[0] + value = stack[0] + self[index:index+1] = ( + instr.copy('STORE_ATTR_REG', owner, attr, value), + ) + self.stack.clear_instrs(clear, instr) + return index + + if (instr.name == "STORE_LOCALS" + and len(self.stack) >= 1): + stack, clear = self.stack.read(1) + self[index:index+1] = ( + instr.copy('STORE_LOCALS_REG', stack[0]), + ) + self.stack.clear_instrs(clear, instr) + return index + + if (instr.name == 'COMPARE_OP' + and len(self.stack) >= 2): + # PUSH_REG reg1 + # PUSH_REG reg2 + # COMPARE_OP cmp + # => + # COMPARE_REG reg3, reg2, reg1 + # PUSH_REG reg3 + stack, clear = self.stack.read(2) + reg1 = stack[0] + reg2 = stack[1] + result = self.new_register() + self[index:index+1] = ( + (instr.copy('COMPARE_REG', result, instr[0], reg1, reg2),) + + self.stack.clear_instrs(clear, instr) + + (instr.copy('PUSH_REG', result), + instr.copy('CLEAR_REG', result),) + ) + return index+1 + + if instr.name == 'CALL_FUNCTION': + # PUSH_REG reg_func + # PUSH_REG reg_arg1 + # PUSH_REG reg_arg2 + # ... + # CALL_FUNCTION narg + # => + # CALL_FUNCTION_REG res_result, reg_func, narg, reg_arg1, reg_arg2, ... + # PUSH_REG res_result + n = instr[0].value + na = n & 0xff + nk = n >> 8 + narg = 1 + na + 2 * nk + if len(self.stack) >= narg: + stack, clear = self.stack.read(narg) + func = stack[0] + regs = stack[1:] + narg_arg = Immediate(self, instr[0].value, 'call_nreg') + if (index+1 < len(self) + and self[index+1].name == 'POP_TOP'): + # PUSH_REG reg1 + # PUSH_REG reg2 + # ... + # CALL_FUNCTION n + # POP_TOP + # => + # CALL_PROCEDURE_REG func, reg2, ..., regn + # PUSH_REG reg0 + self[index:index+2] = ( + instr.copy('CALL_PROCEDURE_REG', func, narg_arg, *regs), + ) + self.stack.clear_instrs(clear, instr) + else: + result = self.new_register() + self[index:index+1] = ( + (instr.copy('CALL_FUNCTION_REG', result, func, narg_arg, *regs),) + + self.stack.clear_instrs(clear, instr) + + (instr.copy('PUSH_REG', result), + instr.copy('CLEAR_REG', result),) + ) + return index+1 + + if (instr.name == 'LIST_APPEND' + and len(self.stack) >= 2 + and instr[0].value-2 < len(self.stack)-1): + # PUSH_REG reg_list + # ... + # PUSH_REG reg_value + # LIST_APPEND 2 + # => + # PUSH_REG reg_list + # ... + # LIST_APPEND_REG reg_list, reg_value + stack, clear = self.stack.read(1) + listreg = self.stack.peek(instr[0].value) + self[index:index+1] = ( + instr.copy('LIST_APPEND_REG', listreg, stack[0]), + ) + self.stack.clear_instrs(clear, instr) + return index + 1 + + if (instr.name == 'MAP_ADD' + and len(self.stack) >= 2 + and (instr[0].value-2) < len(self.stack)-2): + # PUSH_REG reg_mapping + # ... + # PUSH_REG reg_key + # PUSH_REG reg_value + # MAP_ADD 2 + # => + # PUSH_REG reg_mapping + # ... + # MAP_ADD_REG reg_mapping, reg_key, reg_value + + # [map, key, value] pop; pop; peek(2) + stack, clear = self.stack.read(2) + reg_mapping = self.stack.peek(instr[0].value) + reg_value = stack[0] + reg_key = stack[1] + self[index:index+1] = ( + instr.copy('MAP_ADD_REG', reg_mapping, reg_key, reg_value), + ) + self.stack.clear_instrs(clear, instr) + return index + 1 + + if (instr.name == 'RETURN_VALUE' + and len(self.stack) >= 1): + # PUSH_REG reg + # RETURN_VALUE + # => + # RETURN_VALUE_REG reg + stack, clear = self.stack.read(1) + self[index:index+1] = ( + instr.copy('RETURN_VALUE_REG', stack[0]), + ) + self.stack.clear_instrs(clear, instr) + return index+1 + + if instr.name == 'POP_REG': + if self.stack.has_yield_result: + self.stack.has_yield_result = False + if len(self.stack) >= 1: + stack, clear = self.stack.read(1) + reg1 = stack[0] + reg2 = instr[0] + if reg1 != reg2: + # PUSH_REG reg + # POP_REG name + # => + # MOVE_REG name, reg + self[index:index+1] = ( + instr.copy('MOVE_REG', reg2, reg1), + ) + self.stack.clear_instrs(clear, instr) + return index + else: + # PUSH_REG reg + # POP_REG reg + # => + # (remove useless instructions) + del self[index] + return index + + if (instr.name == 'UNPACK_SEQUENCE' + and len(self.stack) >= 1): + # PUSH_REG reg_sequence + # UNPACK_SEQUENCE 2 + # POP_REG reg_x + # POP_REG reg_y + # => + # UNPACK_SEQUENCE_REG reg_sequence, 2, reg_x, reg_y + stack, clear = self.stack.read(1) + arg = instr[0] + regs = [self.new_register() for ireg in range(arg.value)] + sequence = stack[0] + instrs = [ + instr.copy('UNPACK_SEQUENCE_REG', sequence, arg, *regs), + ] + instrs.extend(self.stack.clear_instrs(clear, instr)) + for reg in reversed(regs): + instrs.append(instr.copy('PUSH_REG', reg)) + instrs.append(instr.copy('CLEAR_REG', reg)) + self[index:index+1] = instrs + return index+1 + + if (instr.name == 'UNPACK_EX' + and len(self.stack) >= 1): + stack, clear = self.stack.read(1) + sequence = stack[0] + args = [sequence] + arg = instr[0].value + + nreg_before = arg & 0xff + args.append(Immediate8(self, nreg_before, 'nreg8')) + regs = [self.new_register() for ireg in range(nreg_before)] + args.extend(regs) + + reg_list = self.new_register() + regs.append(reg_list) + args.append(reg_list) + + nreg_after = arg >> 8 + args.append(Immediate8(self, nreg_after, 'nreg8')) + regs_after = [self.new_register() for ireg in range(nreg_after)] + regs.extend(regs_after) + args.extend(regs_after) + + instrs = [instr.copy('UNPACK_EX_REG', *args)] + instrs.extend(self.stack.clear_instrs(clear, instr)) + for reg in reversed(regs): + instrs.append(instr.copy('PUSH_REG', reg)) + instrs.append(instr.copy('CLEAR_REG', reg)) + self[index:index+1] = instrs + return index+1 + + if instr.name in ('MAKE_FUNCTION', 'MAKE_CLOSURE'): + # PUSH_REG reg_code + # PUSH_REG reg_qualname + # MAKE_FUNCTION + # => + # MAKE_FUNCTION reg_result, reg_qualname, reg_code + # PUSH_REG reg_result + # + # or + # + # PUSH_REG reg_closure + # PUSH_REG reg_code + # PUSH_REG reg_qualname + # MAKE_FUNCTION + # => + # MAKE_CLOSURE reg_result, reg_qualname, reg_code, reg_closure + # PUSH_REG reg_result + arg = instr[0].value + posdefaults = arg & 0xff + kwdefaults = (arg >> 8) & 0xff + num_annotations = (arg >> 16) & 0x7fff + + make_closure = (instr.name == 'MAKE_CLOSURE') + if make_closure: + mandatory_nreg = 3 + else: + mandatory_nreg = 2 + nreg = mandatory_nreg + posdefaults + kwdefaults * 2 + num_annotations + if len(self.stack) >= nreg: + stack, clear = self.stack.read(nreg) + stack = stack[::-1] + result = self.new_register() + + args = [result] + args.extend(stack[:mandatory_nreg]) + sp = mandatory_nreg + + args.append(Immediate8(self, posdefaults, 'nreg8')) + args.extend(stack[sp:sp+posdefaults]) + sp += posdefaults + + args.append(Immediate8(self, kwdefaults, 'nreg8')) + args.extend(stack[sp:sp+kwdefaults * 2]) + sp += kwdefaults * 2 + + args.append(Immediate8(self, num_annotations, 'nreg8')) + args.extend(stack[sp:sp+num_annotations]) + + if make_closure: + call_instr = instr.copy('MAKE_CLOSURE_REG', *args) + else: + call_instr = instr.copy('MAKE_FUNCTION_REG', *args) + self[index:index+1] = ( + (call_instr,) + + self.stack.clear_instrs(clear, instr) + + (instr.copy('PUSH_REG', result), + instr.copy('CLEAR_REG', result),) + ) + return index+1 + + if (instr.name == 'BUILD_SLICE' + and instr[0].value in (2, 3)): + # PUSH_REG reg1 + # PUSH_REG reg2 + # BUILD_SLICE 2 + # => + # BUILD_SLICE2 reg3, reg1, reg2 + # PUSH_REG reg3 + nreg = instr[0].value + if len(self.stack) >= nreg: + stack, clear = self.stack.read(nreg) + result = self.new_register() + if nreg == 3: + new_name = 'BUILD_SLICE3_REG' + else: + new_name = 'BUILD_SLICE2_REG' + self[index:index+1] = ( + (instr.copy(new_name, result, *stack),) + + self.stack.clear_instrs(clear, instr) + + (instr.copy('PUSH_REG', result), + instr.copy('CLEAR_REG', result),) + ) + return index+1 + + if instr.name in ('BUILD_TUPLE', 'BUILD_LIST'): + # PUSH_REG reg1 + # PUSH_REG reg2 + # BUILD_LIST 2 + # => + # BUILD_LIST_REG 3, reg0, reg1, reg2 + # PUSH_REG reg0 + nreg = instr[0].value + if len(self.stack) >= nreg: + stack, clear = self.stack.read(nreg) + result = self.new_register() + if instr.name == 'BUILD_TUPLE': + new_opcode = 'BUILD_TUPLE_REG' + else: + new_opcode = 'BUILD_LIST_REG' + length = Immediate(self, nreg, 'nreg') + build_instr = instr.copy(new_opcode, result, length, *stack) + self[index:index+1] = ( + (build_instr,) + + self.stack.clear_instrs(clear, instr) + + (instr.copy('PUSH_REG', result), + instr.copy('CLEAR_REG', result),) + ) + return index+1 + + if (instr.name == 'STORE_MAP' + and len(self.stack) >= 3): + # PUSH_REG reg_map + # PUSH_REG reg_value + # PUSH_REG reg_key + # STORE_MAP + # => + # STORE_MAP_REG reg_map, reg_key, reg_value + # PUSH_REG reg_map + stack, clear = self.stack.read(3) + reg_key = stack[2] + reg_value = stack[1] + reg_map = stack[0] + self[index:index+1] = ( + (instr.copy('STORE_MAP_REG', reg_map, reg_key, reg_value),) + + self.stack.clear_instrs(clear, instr) + + (instr.copy('PUSH_REG', reg_map),) + ) + return index+1 + + if (instr.name == 'STORE_SUBSCR' + and len(self.stack) >= 3): + # PUSH_REG reg_sub + # PUSH_REG reg_container + # PUSH_REG reg_v + # STORE_SUBSCR + # => + # STORE_SUBSCR reg_container, reg_sub, reg_value + stack, clear = self.stack.read(3) + reg_sub = stack[2] + container = stack[1] + value = stack[0] + self[index:index+1] = ( + instr.copy('STORE_SUBSCR_REG', container, reg_sub, value), + ) + self.stack.clear_instrs(clear, instr) + return index+1 + + if (instr.name == 'DELETE_SUBSCR' + and len(self.stack) >= 2): + # PUSH_REG reg_container + # PUSH_REG reg_sub + # DELETE_SUBSCR + # => + # DELETE_SUBSCR_REG reg_container, reg_sub + stack, clear = self.stack.read(2) + reg_container = stack[0] + reg_sub = stack[1] + self[index:index+1] = ( + instr.copy('DELETE_SUBSCR_REG', reg_container, reg_sub), + ) + self.stack.clear_instrs(clear, instr) + return index + + if (instr.name == 'IMPORT_NAME' + and len(self.stack) >= 2): + # PUSH_REG reg_level + # PUSH_REG reg_from + # IMPORT_NAME name + # => + # IMPORT_NAME_REG reg_result, name, reg_from, reg_level + # PUSH_REG reg_result + stack, clear = self.stack.read(2) + reg_level = stack[0] + reg_from = stack[1] + result = self.new_register() + self[index:index+1] = ( + (instr.copy('IMPORT_NAME_REG', result, instr[0], reg_from, reg_level),) + + self.stack.clear_instrs(clear, instr) + + (instr.copy('PUSH_REG', result), + instr.copy('CLEAR_REG', result),) + ) + return index+1 + + if (instr.name == 'IMPORT_FROM' + and len(self.stack) >= 1): + # PUSH_REG reg_module + # IMPORT_NAME name + # => + # PUSH_REG reg_module + # IMPORT_FROM_REG reg_result, reg_module, name + # PUSH_REG reg_result + reg_module = self.stack.peek(1) + result = self.new_register() + self[index:index+1] = ( + instr.copy('IMPORT_FROM_REG', result, reg_module, instr[0]), + instr.copy('PUSH_REG', result), + instr.copy('CLEAR_REG', result), + ) + return index+1 + + if (instr.name == 'IMPORT_STAR' + and len(self.stack) >= 1): + # PUSH_REG reg_module + # IMPORT_STAR + # => + # PUSH_REG reg_module + # IMPORT_STAR_REG reg_module + reg_module = self.stack.peek(1) + self[index:index+1] = ( + instr.copy('IMPORT_STAR_REG', reg_module), + ) + return index+1 + + if instr.name in ("PUSH_REG",): + self.stack.push(self, index, instr[0]) + elif instr.name in ("CLEAR_REG",): + self.stack.clear_reg(self, index, instr[0]) + elif instr.name in ("YIELD_VALUE", 'YIELD_REG'): + if self.stack.has_yield_result: + raise ValueError("yield result already set") + self.stack.has_yield_result = True + elif instr.name in ('POP_TOP', 'STORE_NAME'): + if self.stack.has_yield_result: + self.stack.has_yield_result = False + else: + if len(self.stack) == 0: + raise NotImplementedError("%s whereas the stack is empty" % instr) + self.stack.pop() + elif instr.name == "POP_BLOCK": + # FIXME: is it correct to clear the stack? + self.stack.clear() + elif instr.use_stack(): + raise NotImplementedError("unknown instruction using stack: %s" % instr) + #elif instr.is_cond_jump(): + # raise NotImplementedError("unable to track the stack with conditional jump: %s" % instr) + + def step_peepholer2(self, instr, index): + if instr.name != "CLEAR_REG": + for arg in instr: + if not isinstance(arg, (Local, Register)): + continue + reg = arg.value + if reg in self.cleared_registers: + self.cleared_registers.remove(reg) + + if instr.name == 'NOP': + # Remove NOP instructions + del self[index] + return 0 + + if (instr.name == "CLEAR_REG" + and 1 <= index + and self[index-1].name == "POP_REG" + and self[index-1][0] == instr[0]): + # POP_REG reg + # CLEAR_REG reg + # => + # POP_TOP + reg = instr[0] + self[index-1:index+1] = (instr.copy('POP_TOP'),) + + while index < len(self): + instr = self[index] + if instr.name == "CLEAR_REG" and instr[0] == reg: + self[index] = instr.copy("NOP") + index += 1 + return 0 + + if (self.config.remove_dead_code + and instr.name in ('JUMP_ABSOLUTE', 'JUMP_FORWARD')): + block = instr[0].value.value + if block.index == self.index + 1: + # block1: + # ... + # JUMP_FORWARD block2 + # block2: + # ... + # => + # block1: + # ... + # block2: + # ... + self.converter.warning("Remove useless jump %s in %s" % (instr, self)) + del self[index] + return 0 + + if (instr.name == 'MOVE_REG' + and instr[0] == instr[1]): + # MOVE_REG reg1, reg1 + del self[index] + return 0 + + if instr.name == "CLEAR_REG": + reg = instr[0].value + if reg in self.cleared_registers: + # CLEAR_REG reg1 + # ... + # CLEAR_REG reg1 + # => + # CLEAR_REG reg1 + del self[index] + return 0 + else: + self.cleared_registers.add(reg) + + if instr.name in ("RETURN_VALUE", "RETURN_VALUE_REG"): + if index != len(self)-1: + for after in self[index+1:]: + self.converter.info("Remove useless %s after %s", after, instr) + del self[index+1:] + + # FIXME: don't remove CLEAR_REG before RETURN? + delta = -1 + while index+delta >= 0 and self[index+delta].name == "CLEAR_REG": + self.converter.info("Remove useless %s before %s", self[index+delta], instr) + del self[index+delta] + delta -= 1 + if delta != -1: + return 1 + delta + + def convert_step3(self): + self.converter.clear_free_registers() + self._patch(self.step_reg_alloc) + self._patch(self.step_reg_alloc2) + self.cleared_registers = set() + self._patch(self.step_peepholer2) + + def step_reg_alloc2(self, instr, index): + if instr.name == "CLEAR_REG": + reg = instr[0] + self.converter.add_free_register(self, index, reg) + return + + if (instr.name == 'MOVE_REG' + and isinstance(instr[0], Register)): + oldreg = instr[0] # reg + newreg = instr[1] # reg or local + if not self.is_reg_modified(oldreg, index+1, loop=False): + # MOVE_REG reg, var + # ... + # BINARY_ADD_REG result, var, reg + # => + # ... + # BINARY_ADD_REG result, var, var + if DEBUG_REGALLOC: + self.converter.dump("Before replace 1") + print("REGALLOC: Remove %s: replace register %s with %s" % (instr, oldreg, newreg)) + del self[index] + self.replace_register(self, index, oldreg, newreg) + if DEBUG_REGALLOC: + self.converter.dump("After replace 1") + print() + return 0 + + if (instr.name in OPCODES_REPLACE_REG + and instr.name not in ("FOR_ITER_REG", "UNPACK_SEQUENCE_REG")): + regs = instr.get_registers() + oldreg, regs = regs[0], regs[1:] + loop = (instr.name == "GET_ITER_REG") + if (isinstance(oldreg, Register) + and oldreg not in regs + and not self.is_reg_replaced(oldreg, index+1, loop=loop)): + for newreg in regs: + if DEBUG_REGALLOC: + print("REGALLOC: Can reuse argument %s for %s?" % (newreg, instr)) + if not isinstance(newreg, Register): + continue + if not self.is_reg_used(newreg, index+1, ignore_clear=True, loop=loop): + # LOAD_ATTR_REG result, owner, 'attr' + # ... + # PUSH_REG owner + # PUSH_REG result + # => + # LOAD_ATTR_REG owner, owner, 'attr' + # ... + # PUSH_REG owner + # PUSH_REG owner + if DEBUG_REGALLOC: + self.converter.dump("Before replace 2") + print("REGALLOC: Replace %s with argument %s in %s" % (oldreg, newreg, instr)) + instr[0] = newreg + self.replace_register(self, index+1, oldreg, newreg) + if DEBUG_REGALLOC: + self.converter.dump("After replace 2") + print() + return + if DEBUG_REGALLOC: + print("REGALLOC: Cannot replace %s with %s in %s" % (oldreg, newreg, instr)) + for reg_index, freereg in self.converter.iter_free_registers(): + if DEBUG_REGALLOC: + print("REGALLOC: Can reuse free register %s for %s?" % (freereg, instr)) + if not self.is_reg_used(freereg, index+1, ignore_clear=True, loop=loop): + # LOAD_CONST_REG reg1, const + # CLEAR_REG reg1 + # ... + # LOAD_CONST_REG reg2, const + # => + # LOAD_CONST_REG reg1, const + # ... + # LOAD_CONST_REG reg1, const + if DEBUG_REGALLOC: + self.dump("Before replace 3") + print("REGALLOC: Replace %s with free reg %s in %s" % (oldreg, freereg, instr)) + print() + self.converter.remove_free_register(reg_index) + instr[0] = freereg + self.replace_register(self, index+1, oldreg, freereg) + return + if DEBUG_REGALLOC: + print("REGALLOC: Can reuse free register %s for %s? NO" % (freereg, instr)) + + def step_reg_alloc(self, instr, index): + if (instr.name == 'MOVE_REG' + and isinstance(instr[0], Local)): + # BINARY_ADD_REG reg3, reg2, reg1 + # ... + # MOVE_REG var, reg3 + # CLEAR_REG reg3 + # => + # BINARY_ADD_REG var, reg2, reg1 + # ... + oldreg = instr[1] # local or reg + newreg = instr[0] # local + + found = None + move_index = index + for prev_block, prev_index, prev_instr in self.iter_instr(end=index, backward=True): + if prev_instr.is_reg_replaced(oldreg): + found = (prev_block, prev_index, prev_instr) + break + if prev_instr.is_reg_modified(oldreg): + break + if found is not None: + prev_block, prev_index, prev_instr = found + ireg = prev_instr.index_modified_reg(oldreg) + if DEBUG_REGALLOC: + self.converter.dump() + print("REGALLOC: Remove %s: replace %s with register %s in %s" + % (self[move_index], oldreg, newreg, prev_instr)) + print() + prev_instr[ireg] = newreg + del self[move_index] + self.replace_with_local(prev_block, prev_index+1, oldreg, newreg) + return 0 + + def can_move_load_const(self, instr, load_instr): + if instr.name == 'LOAD_CONST_REG': + return False + if instr.is_reg_used(load_instr[0]): + return False + return True + + def can_move_load_attr(self, instr, load_instr): + if instr.name in ('LOAD_CONST_REG', 'LOAD_ATTR_REG'): + return False + if instr.is_reg_used(load_instr[0]): + return False + if instr.is_reg_used(load_instr[1]): + return False + return True + + def can_move_load_global(self, instr, load_instr): + if instr.name in ('LOAD_CONST_REG', 'LOAD_ATTR_REG', 'LOAD_GLOBAL_REG'): + return False + if instr.name == 'STORE_GLOBAL_REG' and instr[1] == load_instr[1]: + return False + if instr.is_reg_used(load_instr[0]): + return False + return True + + def may_move_instr(self, move_instr, move_index, can_move, stop_on_try=True): + # Can we move the instruction? + found = None + instr_it = self.iter_instr(end=move_index, stop_on_try=stop_on_try, move_instr=True, backward=True) + for block, index, instr in instr_it: + if not can_move(instr, move_instr): + break + found = (block, index, instr) + if found is None: + return False + + # The instruction will be moved: check if the register is cleared below + clear_reg = None + if self.loop_start != found[0].loop_start: + reg = move_instr[0] + instr_it = self.iter_instr(move_index, stop_on_try=stop_on_try, move_instr=True, only_loop=True) + for block, index, instr in instr_it: + if instr.name == "CLEAR_REG" and instr[0] == reg: + clear_reg = (block, index, instr) + break + + # Move CLEAR_REG (if any) + if clear_reg is not None: + block, index, instr = clear_reg + block[index] = instr.copy('NOP') + block_after = self.get_block_after_loop() + block_after[0:0] = (instr,) + + # Move the instruction + block, index, instr = found + self[move_index] = move_instr.copy('NOP') + block[index:index+1] = (move_instr, instr) + + return True + + def step_move_instr(self, instr, index): + # Move LOAD_CONST_REG to the beginning + if (self.config.move_load_const + and instr.name == 'LOAD_CONST_REG'): + # PUSH_REG reg1 + # ... + # LOAD_CONST_REG reg2, const + # => + # LOAD_CONST_REG reg2, const + # ... + # PUSH_REG reg1 + if self.may_move_instr(instr, index, self.can_move_load_const, stop_on_try=False): + return + + # Move LOAD_ATTR_REG to the beginning + if (self.config.move_load_attr + and instr.name == 'LOAD_ATTR_REG'): + # PUSH_REG reg1 + # LOAD_ATTR_REG reg3, reg2, name + # => + # LOAD_ATTR_REG reg3, reg2, name + # PUSH_REG reg1 + if self.may_move_instr(instr, index, self.can_move_load_attr): + return + + # Move LOAD_GLOBAL_REG to the beginning + if (self.config.move_load_global + and instr.name == 'LOAD_GLOBAL_REG'): + # PUSH_REG reg1 + # ... + # LOAD_GLOBAL_REG reg2, name + # => + # LOAD_GLOBAL_REG reg2, name + # ... + # PUSH_REG reg1 + if self.may_move_instr(instr, index, self.can_move_load_global): + return + + def convert_merge_duplicate_load(self): + self.load_consts = RegisterTracker() + self.load_names = RegisterTracker() + self.load_globals = RegisterTracker() + # FIXME: merge duplicate LOAD_ATTR_REG + ## # name => {attr => reg} (used by step_merge_duplicate_load method) + ## self.load_attrs = None + self._patch(self.step_merge_duplicate_load) + + def step_merge_duplicate_load(self, instr, index): + self.load_consts.step(self, index, instr) + self.load_names.step(self, index, instr) + self.load_globals.step(self, index, instr) + + if instr.name == "STORE_NAME_REG": + self.load_names[instr[0]] = instr[1] + + if instr.name == "STORE_GLOBAL_REG": + self.load_globals[instr[0]] = instr[1] + + if (self.config.merge_duplicate_load_const + and instr.name == "LOAD_CONST_REG"): + if instr[1] in self.load_consts: + # LOAD_CONST_REG reg1, var + # ... + # LOAD_CONST_REG reg2, var + # => + # LOAD_CONST_REG reg1, var + # ... + # MOVE_REG reg2, reg1 + reg = self.load_consts.get(instr[1]) + self[index:index+1] = ( + instr.copy("MOVE_REG", instr[0], reg), + instr.copy("CLEAR_REG", reg), + ) + return + else: + self.load_consts[instr[1]] = instr[0] + + if instr.name == "LOAD_NAME_REG": + if instr[1] in self.load_names: + # STORE_NAME_REG var, reg1 + # ... + # LOAD_NAME_REG reg2, var + # => + # STORE_NAME_REG var, reg1 + # ... + # MOVE_REG reg2, reg1 + reg = self.load_names.get(instr[1]) + self[index:index+1] = ( + instr.copy("MOVE_REG", instr[0], reg), + instr.copy("CLEAR_REG", reg), + ) + return + else: + self.load_names[instr[1]] = instr[0] + + if (self.config.merge_duplicate_load_global + and instr.name == "LOAD_GLOBAL_REG"): + if instr[1] in self.load_globals: + # STORE_GLOBAL_REG var, reg1 + # ... + # LOAD_GLOBAL_REG reg2, var + # => + # STORE_GLOBAL_REG var, reg1 + # ... + # MOVE_REG reg2, reg1 + reg = self.load_globals.get(instr[1]) + self[index:index+1] = ( + instr.copy("MOVE_REG", instr[0], reg), + instr.copy("CLEAR_REG", reg) + ) + return + else: + self.load_globals[instr[1]] = instr[0] + + ## if (instr.name in OPCODES_WRITE_INTO + ## and isinstance(instr[0], Local)): + ## # BINARY_ADD_REG var, reg1, reg2 + ## # => don't merge attributes of var + ## reg = instr[0] + ## if reg in self.load_attrs: + ## del self.load_attrs[reg] + ## if instr.name == "STORE_ATTR_REG": + ## # STORE_ATTR_REG owner, attr, value + ## owner = instr[0] + ## if owner in self.load_attrs: + ## del self.load_attrs[owner] + + ## if (self.config.merge_duplicate_load_attr + ## and instr.name == 'LOAD_ATTR_REG'): + ## result = instr[0] + ## owner = instr[1] + ## attr = instr[2] + ## key = (owner, attr) + + ## try: + ## reg2 = self.load_attrs[owner][attr] + ## except KeyError: + ## if owner not in self.load_attrs: + ## self.load_attrs[owner] = {} + ## self.load_attrs[owner][attr] = result + ## else: + ## self[index] = instr.copy('MOVE_REG', result, reg2) + ## return + + +class Converter: + """ + Rewrite the bytecode of a code object to use registers instead + of the stack. Main methods: + + - convert(): rewrite the bytecode + - compile(): return a new code object + """ + def __init__(self, code, config=None): + self.code = code + if config is not None: + self.config = config + else: + self.config = Config() + + self._first_local = code.co_stacksize + self._first_register = code.co_stacksize + code.co_nlocals + len(code.co_cellvars) + len(code.co_freevars) + 1 + self._next_register = 0 + self._free_registers = collections.deque() + + self.instructions = disassemble(self) + self.blocks = None + + def new_register(self): + reg = self._next_register + self._next_register += 1 + return Register(self, reg) + + def clear_free_registers(self): + self._free_registers.clear() + + def add_free_register(self, block, index, reg): + if not isinstance(reg, Register): + return + item = (reg, block, index) + self._free_registers.append(item) + + def iter_free_registers(self): + for index, item in enumerate(self._free_registers): + yield (index, item[0]) + + def remove_free_register(self, reg_index): + reg, block, index = self._free_registers[reg_index] + block[index] = Instruction(self.code, name='NOP') + del self._free_registers[reg_index] + + def _iter_instr(self): + for block in self.blocks: + for instr in block: + yield instr + + def recompute_labels(self): + # Compute absolute position of labels and remove labels + labels = {} + abs_pos = 0 + for block in self.blocks: + labels[block] = abs_pos + abs_pos += block.get_size() + + # Replace label arguments by their value + pos = 0 + for instr in self._iter_instr(): + pos += instr.get_size() + for arg in instr.arguments: + if not(isinstance(arg, Immediate) and isinstance(arg.value, Label)): + continue + block = arg.value.value + abs_pos = labels[block] + if arg.arg_type == 'jrel': + arg.value = abs_pos - pos + else: + arg.value = abs_pos + # an instruction cannot have more than one label + break + + def renumber_registers(self): + self._next_register = 0 + mapping = {} + for instr in self.instructions: + for index, arg in enumerate(instr.arguments): + if not isinstance(arg, Register): + continue + if arg.value not in mapping: + mapping[arg.value] = self.new_register() + arg = mapping[arg.value] + instr.arguments[index] = arg + + nreg = self._next_register - self._first_register + if nreg >= FRAME_NREGISTER: + raise Exception("too much registers!") + + def create_blocks(self): + # compute labels + pos = 0 + labels = set() + for instr in self.instructions: + pos += instr.get_size() + for arg in instr.arguments: + if arg.arg_type == 'jabs': + labels.add(arg.value) + elif arg.arg_type == 'jrel': + labels.add(pos + arg.value) + + # split blocks + pos = 0 + index = 0 + start = 0 + blocks = [] + block_labels = {} + previous = None + previous_instr = None + previous_pos = None + while index < len(self.instructions): + instr = self.instructions[index] + pos += instr.get_size() + + link = True + if pos in labels: + split = True + elif index == len(self.instructions)-1: + split = True + elif instr.is_terminal(): + split = True + else: + split = False + for arg in instr.arguments: + if arg.arg_type in ('jabs', 'jrel'): + split = True + break + + if split: + if instr.is_terminal(): + link = False + + block = Block(self, self.instructions[start:index+1], len(blocks)) + if not blocks: + block.block_type = 'start' + blocks.append(block) + if previous is not None: + if previous_instr.is_cond_jump(): + link_type = 'jump if' + block.block_type = 'if (#%s)' % (1 + previous.index) + else: + link_type = 'consecutive' + previous.add_link(Link(link_type, block)) + if link: + previous = block + else: + previous = None + previous_instr = instr + if previous_pos is not None: + block_labels[previous_pos] = block + previous_pos = pos + start = index + 1 + + index += 1 + block_labels[previous_pos] = previous + + # link blocks + labels = {} + pos = 0 + for block_index, block in enumerate(blocks): + for instr in block.instructions: + pos += instr.get_size() + jump_arg = None + for arg in instr.arguments: + if arg.arg_type in ('jabs', 'jrel'): + jump_arg = arg + break + if jump_arg is None: + continue + if jump_arg.arg_type == 'jabs': + abs_pos = jump_arg.value + elif jump_arg.arg_type == 'jrel': + abs_pos = pos + jump_arg.value + else: + continue + + dest_block = block_labels[abs_pos] + + if instr.name == "SETUP_EXCEPT": + name = 'except_%s' % (1 + len(labels)) + label_type = 'except' + next_block = blocks[block_index+1] + next_block.block_type = 'try' + dest_block.block_type = 'except' + elif instr.name == "SETUP_LOOP": + name = 'setup_loop_%s' % (1 + len(labels)) + label_type = 'setup_loop' + elif instr.name in ("FOR_ITER", "FOR_ITER_REG"): + name = 'for_iter_%s' % (1 + len(labels)) + label_type = 'except StopIteration' + elif instr.is_cond_jump(): + name = 'cond_%s' % (1 + len(labels)) + label_type = 'jump else' + dest_block.block_type = 'else (#%s)' % (1 + block.index) + elif instr.name == "SETUP_FINALLY": + name = 'finally_%s' % (1 + len(labels)) + label_type = 'finally' + elif instr.name in ("JUMP_ABSOLUTE", "JUMP_FORWARD"): + name = 'label_%s' % (1 + len(labels)) + label_type = 'jump' + else: + name = 'label_%s' % (1 + len(labels)) + label_type = 'unknown' + + block.add_link(Link(label_type, dest_block)) + for arg in instr.arguments: + if arg.arg_type in ('jrel', 'jabs'): + arg.value = Label(self.code, dest_block, arg.arg_type, label_type) + break + return blocks + + def iter_blocks(self, block=None, backward=False, stop_on_try=False, move_instr=False, loop=True): + seen = set() + loops = set() + blocks = collections.deque() + if move_instr: + loop = False + + blocks.append(block) + while blocks: + ignore = False + if move_instr and len(blocks) > 1: + # We are in a conditional branch: search a common parent + blocks = list(blocks) + blocks.sort(key=lambda block: block.index, reverse=True) + blocks = collections.deque(blocks) + ignore = True + block = blocks.popleft() + + if stop_on_try and block.block_type in ('try', 'except'): + return + + if not ignore: + if block in seen: + if loop: + # emit the same block again to finish iterating on the loop + yield block + continue + + seen.add(block) + yield block + else: + seen.add(block) + + if backward: + links = block.in_links + else: + links = block.out_links + for link in links: + if move_instr and link.block.index > block.index: + # when moving instructions, we only want to move to the + # beginning (and not to the end) + continue + if move_instr: + if link.block in seen: + continue + if link.block in blocks: + continue + blocks.append(link.block) + + def info(self, message, *args): + if self.config.debug: + if args: + message %= args + print(message) + + def warning(self, message): + if not self.config.quiet: + print(message) + + def remove_dead_blocks(self): + while 1: + removed = [] + blocks = [] + for index, block in enumerate(self.blocks): + if 1 <= index and not block.in_links: + text = str(block) + instrs = tuple(block) + if instrs: + text += " (%s)" % ', '.join(map(str, instrs)) + self.warning("Remove dead block: %s" % text) + removed.append(block) + continue + blocks.append(block) + self.blocks = blocks + if not removed: + break + for removed_block in removed: + for block in blocks: + block.unref_block(removed_block) + + def convert(self, + dump_original=False, dump_optimized=True, + dump_bytecode=False, dump_blocks=False): + if dump_original and dump_bytecode: + self.dump(prefix="original bytecode") + + self.blocks = self.create_blocks() + detect_loops(self.blocks[0], set()) + self.instructions = None + + if dump_original and dump_blocks: + self.dump(prefix="original blocks") + for block in self.blocks: + block.convert_step1() + + stacks = {} + for block in self.iter_blocks(self.blocks[0]): + link = block.get_in_link('consecutive') + if link is not None and link.block in stacks: + stack = stacks[link.block].copy() + else: + stack = Stack() + try: + block.convert_with_stack(stack) + except NotImplementedError as err: + self.warning("STACK: NOT SUPPORTED: %s" % err) + else: + stacks[block] = block.stack + + for block in self.blocks: + if not block.in_loop: + continue + block.convert_move_instr() + for block in self.blocks: + block.convert_merge_duplicate_load() + for block in self.blocks: + block.convert_step3() + + if self.config.remove_dead_code: + self.remove_dead_blocks() + + if dump_optimized and dump_blocks: + self.dump(prefix="optimized blocks") + self.recompute_labels() + + self.instructions = [] + for block in self.blocks: + self.instructions.extend(block.instructions) + self.blocks = None + + self.renumber_registers() + if dump_optimized and dump_bytecode: + self.dump(prefix="optimized bytecode") + + def check_inefficient_code(self, max_warning=5): + nwarning = 0 + for instr in self.instructions: + if instr.name not in ('PUSH_REG', 'POP_REG'): + continue + if nwarning >= max_warning: + print("!!! SUBOPTIMAL CODE !!! (...)") + return + print("!!! SUBOPTIMAL CODE !!! %s" % instr) + nwarning += 1 + + def dump(self, prefix=None, with_name=False): + name = "%s:%s" % (self.code.co_filename, self.code.co_firstlineno) + if prefix: + name = "%s:%s" % (prefix, name) + if self.blocks is not None: + ninstr = len(tuple(self._iter_instr())) + title = "============= %s: %s blocks (%s instructions) ========" % (name, len(self.blocks), ninstr) + print(title) + pos = 0 + for index, block in enumerate(self.blocks): + print(block) + for link in block.in_links: + print("<-- %s --- %s" % (link.link_type, link.block)) + pos = block.dump(pos=pos) + for link in block.out_links: + print("--- %s --> %s" % (link.link_type, link.block)) + if index != len(self.blocks)-1: + print() + print(title) + else: + print("%s (%s instructions):" % (name, len(self.instructions))) + pos = 0 + for instr in self.instructions: + size = instr.get_size() + print("%3i: %s" % (pos, instr.format(pos+size, with_name=with_name))) + pos += size + + def compile(self): + # FIXME: recompute the stack size, need to recompute the index of + # FIXME: cell and free variables (Register and Local already handles + # FIXME: the renumbering correctly) + bytecode = b''.join( + instr.compile() + for instr in self.instructions + if instr.name is not LABEL) + return patch_code_obj(self.code, bytecode=bytecode) + + +def optimize_code(code, config=None, **options): + converter = Converter(code, config) + converter.convert(**options) + return converter.compile() + +def optimize_func(func, config=None, **options): + if isinstance(func, types.MethodType): + return optimize_func(func.__func__, config) + new_code = optimize_code(func.__code__, config, **options) + func.__code__ = new_code + diff -r fb80df16c4ff -r 6a9ca0177020 Lib/site.py --- a/Lib/site.py Tue Oct 23 21:14:34 2012 +0300 +++ b/Lib/site.py Tue May 20 11:04:02 2014 +0200 @@ -586,6 +586,27 @@ if ENABLE_USER_SITE: execusercustomize() + if 0: + try: + from registervm import optimize_code, Config + except ImportError as err: + print("Failed to import registervm module: %s" % err) + else: + config = Config() + config.quiet = False + config.enable_unsafe_optimizations() + + def code_optimizer(co): + try: + new_code = optimize_code(co, config) + except Exception as err: + print("Failed to optimize bytecode: [%s] %s" + % (type(err).__name__, err)) + return co + return new_code + sys.setcodeoptimizer(code_optimizer) + print("Enable code optimizer (registervm)") + # Prevent edition of sys.path when python was started with -S and # site is imported later. if not sys.flags.no_site: diff -r fb80df16c4ff -r 6a9ca0177020 Lib/test/test_registervm.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Lib/test/test_registervm.py Tue May 20 11:04:02 2014 +0200 @@ -0,0 +1,920 @@ +from test import support +import gc +import re +import registervm +import sys +import types +import unittest + +def unary_not(x): + x = not x + return x + +def noop(): + return + +def loop_sum(n): + total = 0 + for i in range(n): + total += i + return total + +def fact(n): + if n < 2: + return 1 + else: + return n * fact(n-1) + +def fact_loop(n): + f = 1 + for i in range(2, n+1): + f *= i + return f + +def pow2(n): + x = 1 + for i in range(n): + x *= 2 + return x + +def bisect(a, x): + lo = 0 + hi = len(a) + while lo < hi: + mid = (lo+hi)//2 + if x < a[mid]: + hi = mid + else: + lo = mid+1 + return lo + +def sieve(n): + primes = list(range(2, n+1)) + for i in primes: + j = 2 + while i * j <= primes[-1]: + if i * j in primes: + primes.remove(i*j) + j += 1 + return primes + +def merge_load(): + lst = [] + lst.append(len("a")) + lst.append(len("a")) + lst.append(len("a")) + return lst + +def clear_reg(value): + lst = () + # useless instruction + lst[:] + return lst + +def partition(sequence, l, e, g): + if not sequence: + return (l, e, g) + else: + head = sequence[0] + if head < e[0]: + return partition(sequence[1:], l + [head], e, g) + elif head > e[0]: + return partition(sequence[1:], l, e, g + [head]) + else: + return partition(sequence[1:], l, e + [head], g) + +def qsort2(sequence): + if not sequence: + return [] + else: + pivot = sequence[0] + lesser, equal, greater = partition(sequence[1:], [], [pivot], []) + return qsort2(lesser) + equal + qsort2(greater) + +def make_func(): + def f(a, *b, c=5): + return (a, b, c) + return f("a", "b1", "b2") + +def store_map(): + mapping = {"key1": 1, "key2": 2} + return mapping + +def store_subscr(): + d = {} + d["key"] = "value" + return d + +def build_list_tuple(): + x = 1 + y = 2 + l = [x, y] + return (x, y, l) + +def delete_subscr(): + numbers = [1, 2, 3, 4, 5] + del numbers[1:3] + return numbers + +def loop_concat(): + s = 'om' + for i in range(3): + s = s + 'xbx' + return s + +def dictcomp(): + return {x:x for x in "abc"} + +def yield_value(): + x = 3 + yield x + +def yield_use_result(): + x = 3 + y = yield x + yield y + +def getline(lines, lineno): + if 1 <= lineno <= len(lines): + return lines[lineno-1] + else: + return '' + +def move_instr(): + x = "x" + try: + y = x.unknown_attr + 1 + except AttributeError: + y = 1 + +def loop_compare_str(): + t = "t" + s = "s" + for loop in range(3): + # useless instruction + t >= s + return True + +def set_attr(obj): + obj.attr1 += 1 + obj.attr2 = 3 + +def get_loader(module_or_name): + # regression test extracted from the pkgutil module of Python 3.4 + if module_or_name: + loader = getattr(module_or_name, '__loader__', None) + fullname = module_or_name.__name__ + else: + fullname = module_or_name + return find_loader(fullname) + +def find_loader(name, path=None): + try: + loader = sys.modules[name].__loader__ + if loader is None: + raise ValueError('{}.__loader__ is None'.format(name)) + else: + return loader + except KeyError: + pass + return _bootstrap._find_module(name, path) + +def store_load_global(): + global global_var + global_var = 5 + return global_var +global_var = None + +class OptimizeTests(unittest.TestCase): + maxDiff = 80*80 + + def setUp(self): + # enable most optimizations + self.config = registervm.Config() + self.config.quiet = True + self.config.enable_unsafe_optimizations() + self.config.enable_buggy_optimizations() + + def optimize_code(self, code): + converter = registervm.Converter(code, self.config) + converter.convert() + code = converter.compile() + return converter, code + + def optimize_func(self, func): + if isinstance(func, types.MethodType): + func = func.__func__ + converter, code = self.optimize_code(func.__code__) + func.__code__ = code + return converter + + def optimize_expr(self, code_str): + code = compile(code_str, "", "exec") + return self.optimize_code(code) + + def get_bytecode(self, converter): + text = '\n'.join(str(instr) for instr in converter.instructions) + return re.sub('at 0x[a-f0-9]+, file "[^"]+", line [0-9]+', + 'at (...)', + text) + + def check_bytecode(self, code, expected): + """ + Check the formatted code: expected can contain "(...)" which matches + any string. + """ + expected = expected.strip() + text = self.get_bytecode(code) + self.assertEqual(expected, text) + + def can_reoptimize(self, code): + # Optimize a function twice must not fail + if isinstance(code, types.CodeType): + code_obj = code + else: + code_obj = code.__code__ + converter = registervm.Converter(code_obj, self.config) + before = self.get_bytecode(converter) + + if isinstance(code, types.CodeType): + converter2, code2 = self.optimize_code(code) + else: + converter2 = self.optimize_func(code) + after = self.get_bytecode(converter2) + self.assertEqual(before, after) + + def check_exec(self, expected, func, *args, is_generator=False, reoptimize=True): + # FIXME: always reoptimize + if reoptimize: + self.can_reoptimize(func) + + if is_generator: + generator = func + def read_generator(): + return list(generator(*args)) + func = read_generator + args = () + result = func(*args) + self.assertEqual(result, expected) + if hasattr(sys, "gettotalrefcount"): + for prepare in range(50): + func(*args) + gc.collect() + + before = None + before = sys.gettotalrefcount() + for repeat in range(3): + func(*args) + gc.collect() + gc.collect() + del repeat + leak = sys.gettotalrefcount() - before + if leak: + self.fail("reference leak: %s" % leak) + + def exec_code(self, code, reoptimize=False): + # FIXME: always reoptimize + if reoptimize: + self.can_reoptimize(code) + + ns = {} + exec(code, ns, ns) + return ns + + def check_expr(self, varname, expected, code): + ns = self.exec_code(code) + value = ns[varname] + self.assertEqual(value, expected) + + def test_unary_not(self): + converter = self.optimize_func(unary_not) + self.check_bytecode(converter, """ +UNARY_NOT_REG 'x', 'x' +RETURN_VALUE_REG 'x' + """) + self.check_exec(False, unary_not, True) + + def test_store_map(self): + converter = self.optimize_func(store_map) + self.check_bytecode(converter, """ +BUILD_MAP_REG 'mapping', 2 +LOAD_CONST_REG R0, 1 (const#1) +LOAD_CONST_REG R1, 'key1' (const#2) +STORE_MAP_REG 'mapping', R1, R0 +LOAD_CONST_REG R0, 2 (const#3) +LOAD_CONST_REG R1, 'key2' (const#4) +STORE_MAP_REG 'mapping', R1, R0 +RETURN_VALUE_REG 'mapping' + """) + self.check_exec({'key1': 1, 'key2': 2}, store_map) + + def test_store_subscr(self): + converter = self.optimize_func(store_subscr) + self.check_bytecode(converter, """ +BUILD_MAP_REG 'd', 0 +LOAD_CONST_REG R0, 'value' (const#1) +LOAD_CONST_REG R1, 'key' (const#2) +STORE_SUBSCR_REG 'd', R1, R0 +RETURN_VALUE_REG 'd' + """) + self.check_exec({'key': 'value'}, store_subscr) + + def test_build_list_tuple(self): + converter = self.optimize_func(build_list_tuple) + self.check_bytecode(converter, """ +LOAD_CONST_REG 'x', 1 (const#1) +LOAD_CONST_REG 'y', 2 (const#2) +BUILD_LIST_REG 'l', 2, 'x', 'y' +BUILD_TUPLE_REG R0, 3, 'x', 'y', 'l' +RETURN_VALUE_REG R0 + """) + self.check_exec((1, 2, [1, 2]), build_list_tuple) + + def test_delete_subscr(self): + converter = self.optimize_func(delete_subscr) + self.check_bytecode(converter, """ +LOAD_CONST_REG R0, 1 (const#1) +LOAD_CONST_REG R1, 2 (const#2) +LOAD_CONST_REG R2, 3 (const#3) +LOAD_CONST_REG R3, 4 (const#4) +LOAD_CONST_REG R4, 5 (const#5) +BUILD_LIST_REG 'numbers', 5, R0, R1, R2, R3, R4 +CLEAR_REG R1 +CLEAR_REG R3 +CLEAR_REG R4 +BUILD_SLICE2_REG R0, R0, R2 +CLEAR_REG R2 +DELETE_SUBSCR_REG 'numbers', R0 +RETURN_VALUE_REG 'numbers' + """) + # FIXME: reoptimize reuses R3 for "LOAD_CONST_REG R2, 3 (const#3)" + self.check_exec([1, 4, 5], delete_subscr, reoptimize=False) + + def test_loop_concat(self): + converter = self.optimize_func(loop_concat) + self.check_bytecode(converter, """ +LOAD_CONST_REG 's', 'om' (const#1) +SETUP_LOOP +LOAD_GLOBAL_REG R0, 'range' (name#0) +LOAD_CONST_REG R1, 3 (const#2) +LOAD_CONST_REG R2, 'xbx' (const#3) +CALL_FUNCTION_REG R0, R0, (1 positional), R1 +CLEAR_REG R1 +GET_ITER_REG R0, R0 +FOR_ITER_REG 'i', R0, +BINARY_ADD_REG 's', 's', R2 +JUMP_ABSOLUTE 40 +CLEAR_REG R2 +CLEAR_REG R0 +POP_BLOCK +RETURN_VALUE_REG 's' + """) + self.check_exec("omxbxxbxxbx", loop_concat, reoptimize=False) + + # FIXME: fix MAP_ADD_REG and stack tracker + @unittest.skipIf(True, "FIXME") + def test_dictcomp(self): + converter = self.optimize_func(dictcomp) + self.check_bytecode(converter, """ +LOAD_CONST_REG R0, at (...)> (const#1) +LOAD_CONST_REG R1, 'dictcomp..' (const#2) +MAKE_FUNCTION_REG R1, R1, R0, 0, 0, 0 +LOAD_CONST_REG R0, 'abc' (const#3) +GET_ITER_REG R0, R0 +CALL_FUNCTION_REG R1, R1, (1 positional), R0 +RETURN_VALUE_REG R1 + """) + + func_code = dictcomp.__code__ + const_index = 1 + converter2, code2 = self.optimize_code(func_code.co_consts[const_index]) + self.check_bytecode(converter2, """ +BUILD_MAP_REG R0, 0 +PUSH_REG R0 +CLEAR_REG R0 +FOR_ITER_REG 'x', '.0', +MAP_ADD_REG R0, 'x', 'x' +JUMP_ABSOLUTE 11 +RETURN_VALUE + """) + dictcomp.__code__ = registervm.patch_code_obj(func_code, const={const_index: code2}) + + self.check_exec({"a": "a", "b": "b", "c": "c"}, dictcomp, reoptimize=False) + + def test_yield_value(self): + converter = self.optimize_func(yield_value) + self.check_bytecode(converter, """ +LOAD_CONST_REG 'x', 3 (const#1) +YIELD_REG 'x' +POP_TOP +LOAD_CONST_REG R0, None (const#0) +RETURN_VALUE_REG R0 + """) + + self.check_exec([3], yield_value, is_generator=True) + + def test_yield_use_result(self): + converter = self.optimize_func(yield_use_result) + self.check_bytecode(converter, """ +LOAD_CONST_REG 'x', 3 (const#1) +YIELD_REG 'x' +POP_REG 'y' +YIELD_REG 'y' +POP_TOP +LOAD_CONST_REG R0, None (const#0) +RETURN_VALUE_REG R0 + """) + + self.check_exec([3, None], yield_use_result, is_generator=True) + + def test_getline(self): + converter = self.optimize_func(getline) + self.check_bytecode(converter, """ +LOAD_CONST_REG R0, 1 (const#1) +PUSH_REG 'lineno' +COMPARE_REG R0, '<=', R0, 'lineno' +JUMP_IF_FALSE_REG R0, 48 +LOAD_GLOBAL_REG R1, 'len' (name#0) +CALL_FUNCTION_REG R1, R1, (1 positional), 'lines' +PUSH_REG R1 +CLEAR_REG R1 +COMPARE_OP '<=' +JUMP_FORWARD +POP_REG R0 +POP_TOP +JUMP_IF_FALSE_REG R0, 79 +LOAD_CONST_REG R2, 1 (const#1) +BINARY_SUBTRACT_REG R2, 'lineno', R2 +BINARY_SUBSCR_REG R2, 'lines', R2 +RETURN_VALUE_REG R2 +LOAD_CONST_REG R0, '' (const#2) +RETURN_VALUE_REG R0 + """) + self.check_exec("b", getline, ["a", "b", "c"], 2, reoptimize=False) + + def test_move_instr(self): + # LOAD_ATTR_REG must not be moved outside the try block + # LOAD_GLOBAL_REG must not be moved outside the except block + converter = self.optimize_func(move_instr) + self.check_bytecode(converter, """ +LOAD_CONST_REG 'x', 'x' (const#1) +SETUP_EXCEPT +LOAD_ATTR_REG R0, 'x', 'unknown_attr' (name#0) +LOAD_CONST_REG R1, 1 (const#2) +BINARY_ADD_REG 'y', R0, R1 +CLEAR_REG R0 +CLEAR_REG R1 +POP_BLOCK +JUMP_FORWARD +POP_REG R2 +PUSH_REG R2 +LOAD_GLOBAL_REG R3, 'AttributeError' (name#1) +COMPARE_REG R2, 'exception match', R2, R3 +CLEAR_REG R3 +JUMP_IF_FALSE_REG R2, 86 +POP_TOP +POP_TOP +POP_TOP +LOAD_CONST_REG R4, 1 (const#2) +PUSH_REG R4 +CLEAR_REG R4 +POP_REG 'y' +POP_EXCEPT +JUMP_FORWARD +CLEAR_REG R2 +END_FINALLY +LOAD_CONST_REG R5, None (const#0) +RETURN_VALUE_REG R5 + """) + self.check_exec(None, move_instr) + + def test_list_append(self): + converter, code = self.optimize_expr(""" +x = [] +for i in range(30): + x.append(i) + """) + self.check_bytecode(converter, """ +BUILD_LIST_REG R0, 0 +STORE_NAME_REG 'x' (name#0), R0 +CLEAR_REG R0 +SETUP_LOOP +LOAD_NAME_REG R1, 'range' (name#1) +LOAD_CONST_REG R2, 30 (const#0) +CALL_FUNCTION_REG R1, R1, (1 positional), R2 +CLEAR_REG R2 +GET_ITER_REG R1, R1 +FOR_ITER_REG R3, R1, +STORE_NAME_REG 'i' (name#2), R3 +LOAD_NAME_REG R4, 'x' (name#0) +LOAD_ATTR_REG R4, R4, 'append' (name#3) +CALL_PROCEDURE_REG R4, (1 positional), R3 +CLEAR_REG R4 +CLEAR_REG R3 +JUMP_ABSOLUTE 43 +CLEAR_REG R1 +POP_BLOCK +LOAD_CONST_REG R5, None (const#1) +RETURN_VALUE_REG R5 + """) + self.check_expr('x', list(range(30)), code) + + def test_store_load_name(self): + converter, code = self.optimize_expr(""" +def f(): + pass +f() + """) + self.check_bytecode(converter, """ +LOAD_CONST_REG R0, (const#0) +LOAD_CONST_REG R1, 'f' (const#1) +MAKE_FUNCTION_REG R1, R1, R0, 0, 0, 0 +STORE_NAME_REG 'f' (name#0), R1 +CALL_PROCEDURE_REG R1, (0 positional) +CLEAR_REG R1 +LOAD_CONST_REG R0, None (const#2) +RETURN_VALUE_REG R0 + """) + self.exec_code(code, reoptimize=False) + + # FIXME: LIST_APPEND and stack tracker + @unittest.skipIf(True, "FIXME") + def test_gen_list_append(self): + converter, code = self.optimize_expr(""" +x = [i for i in range(30)] + """) + self.check_bytecode(converter, """ +LOAD_CONST_REG R0, at (...)> (const#0) +LOAD_CONST_REG R1, '' (const#1) +MAKE_FUNCTION_REG R1, R1, R0, 0, 0, 0 +LOAD_NAME_REG R2, 'range' (name#0) +LOAD_CONST_REG R0, 30 (const#2) +CALL_FUNCTION_REG R2, R2, (1 positional), R0 +CLEAR_REG R0 +GET_ITER_REG R2, R2 +CALL_FUNCTION_REG R1, R1, (1 positional), R2 +CLEAR_REG R2 +PUSH_REG R1 +CLEAR_REG R1 +STORE_NAME 'x' (name#1) +LOAD_CONST None (const#3) +RETURN_VALUE + """) + + converter2, code2 = self.optimize_code(code.co_consts[0]) + self.check_bytecode(converter2, """ +BUILD_LIST_REG R0, 0 +PUSH_REG R0 +CLEAR_REG R0 +FOR_ITER_REG 'i', '.0', +LIST_APPEND_REG R0, 'i' +JUMP_ABSOLUTE 11 +RETURN_VALUE + """) + code = registervm.patch_code_obj(code, const={0: code2}) + + self.check_expr('x', list(range(30)), code) + + def test_return_none(self): + converter = self.optimize_func(noop) + self.check_bytecode(converter, """ +LOAD_CONST_REG R0, None (const#0) +RETURN_VALUE_REG R0 + """) + self.check_exec(None, noop) + + def test_loop_sum(self): + converter = self.optimize_func(loop_sum) + self.check_bytecode(converter, """ +LOAD_CONST_REG 'total', 0 (const#1) +SETUP_LOOP +LOAD_GLOBAL_REG R0, 'range' (name#0) +CALL_FUNCTION_REG R0, R0, (1 positional), 'n' +GET_ITER_REG R0, R0 +FOR_ITER_REG 'i', R0, +INPLACE_ADD_REG 'total', 'i' +JUMP_ABSOLUTE 27 +CLEAR_REG R0 +POP_BLOCK +RETURN_VALUE_REG 'total' + """) + self.check_exec(4950, loop_sum, 100) + + def test_factorial_loop(self): + converter = self.optimize_func(fact_loop) + self.check_bytecode(converter, """ +LOAD_CONST_REG 'f', 1 (const#1) +SETUP_LOOP +LOAD_GLOBAL_REG R0, 'range' (name#0) +LOAD_CONST_REG R1, 2 (const#2) +LOAD_CONST_REG R2, 1 (const#1) +BINARY_ADD_REG R2, 'n', R2 +CALL_FUNCTION_REG R0, R0, (2 positional), R1, R2 +CLEAR_REG R1 +CLEAR_REG R2 +GET_ITER_REG R0, R0 +FOR_ITER_REG 'i', R0, +INPLACE_MULTIPLY_REG 'f', 'i' +JUMP_ABSOLUTE 52 +CLEAR_REG R0 +POP_BLOCK +RETURN_VALUE_REG 'f' + """) + self.check_exec(3628800, fact_loop, 10, reoptimize=False) + + def test_factorial(self): + converter = self.optimize_func(fact) + self.check_bytecode(converter, """ +LOAD_CONST_REG R0, 2 (const#1) +COMPARE_REG R0, '<', 'n', R0 +JUMP_IF_FALSE_REG R0, 27 +LOAD_CONST_REG R1, 1 (const#2) +RETURN_VALUE_REG R1 +LOAD_GLOBAL_REG R0, 'fact' (name#0) +LOAD_CONST_REG R2, 1 (const#2) +BINARY_SUBTRACT_REG R2, 'n', R2 +CALL_FUNCTION_REG R0, R0, (1 positional), R2 +CLEAR_REG R2 +BINARY_MULTIPLY_REG R0, 'n', R0 +RETURN_VALUE_REG R0 + """) + self.check_exec(3628800, fact, 10) + + def test_pow2(self): + converter = self.optimize_func(pow2) + self.check_bytecode(converter, """ +LOAD_CONST_REG 'x', 1 (const#1) +LOAD_CONST_REG R0, 2 (const#2) +SETUP_LOOP +LOAD_GLOBAL_REG R1, 'range' (name#0) +CALL_FUNCTION_REG R1, R1, (1 positional), 'n' +GET_ITER_REG R1, R1 +FOR_ITER_REG 'i', R1, +INPLACE_MULTIPLY_REG 'x', R0 +JUMP_ABSOLUTE 32 +CLEAR_REG R0 +CLEAR_REG R1 +POP_BLOCK +RETURN_VALUE_REG 'x' + """) + self.check_exec(4294967296, pow2, 32, reoptimize=False) + + def test_sieve(self): + converter = self.optimize_func(sieve) + self.check_bytecode(converter, """ +LOAD_GLOBAL_REG R0, 'list' (name#0) +LOAD_GLOBAL_REG R1, 'range' (name#1) +LOAD_CONST_REG 'j', 2 (const#1) +LOAD_CONST_REG R2, 1 (const#2) +LOAD_CONST_REG R3, -1 (const#3) +BINARY_ADD_REG R4, 'n', R2 +CALL_FUNCTION_REG R1, R1, (2 positional), 'j', R4 +CLEAR_REG R4 +CALL_FUNCTION_REG 'primes', R0, (1 positional), R1 +CLEAR_REG R0 +CLEAR_REG R1 +SETUP_LOOP +GET_ITER_REG R5, 'primes' +FOR_ITER_REG 'i', R5, +SETUP_LOOP +BINARY_MULTIPLY_REG R6, 'i', 'j' +BINARY_SUBSCR_REG R3, 'primes', R3 +COMPARE_REG R6, '<=', R6, R3 +JUMP_IF_FALSE_REG R6, 166 +BINARY_MULTIPLY_REG R7, 'i', 'j' +COMPARE_REG R7, 'in', R7, 'primes' +LOAD_ATTR_REG R8, 'primes', 'remove' (name#2) +JUMP_IF_FALSE_REG R7, 155 +BINARY_MULTIPLY_REG R9, 'i', 'j' +CALL_PROCEDURE_REG R8, (1 positional), R9 +CLEAR_REG R8 +CLEAR_REG R9 +CLEAR_REG R7 +INPLACE_ADD_REG 'j', R2 +JUMP_ABSOLUTE 79 +CLEAR_REG R6 +POP_BLOCK +JUMP_ABSOLUTE 69 +CLEAR_REG R2 +CLEAR_REG R3 +CLEAR_REG R5 +POP_BLOCK +RETURN_VALUE_REG 'primes' + """) + self.check_exec([1, 1, 1], merge_load, reoptimize=False) + + def test_bisect(self): + converter = self.optimize_func(bisect) + self.check_bytecode(converter, """ +LOAD_CONST_REG 'lo', 0 (const#1) +LOAD_GLOBAL_REG R0, 'len' (name#0) +CALL_FUNCTION_REG 'hi', R0, (1 positional), 'a' +CLEAR_REG R0 +SETUP_LOOP +COMPARE_REG R1, '<', 'lo', 'hi' +JUMP_IF_FALSE_REG R1, 109 +CLEAR_REG R1 +BINARY_ADD_REG R2, 'lo', 'hi' +LOAD_CONST_REG R3, 2 (const#2) +BINARY_FLOOR_DIVIDE_REG 'hi', R2, R3 +CLEAR_REG R3 +BINARY_SUBSCR_REG R2, 'a', 'hi' +COMPARE_REG R2, '<', 'x', R2 +JUMP_IF_FALSE_REG R2, 88 +JUMP_ABSOLUTE 25 +CLEAR_REG R2 +LOAD_CONST_REG R4, 1 (const#3) +BINARY_ADD_REG 'lo', 'hi', R4 +CLEAR_REG R4 +JUMP_ABSOLUTE 25 +CLEAR_REG R1 +POP_BLOCK +RETURN_VALUE_REG 'lo' + """) + self.check_exec(2, bisect, tuple(range(100)), 1, reoptimize=False) + + def test_clear_reg(self): + converter = self.optimize_func(clear_reg) + self.check_bytecode(converter, """ +BUILD_TUPLE_REG 'lst', 0 +LOAD_CONST_REG R0, None (const#0) +BUILD_SLICE2_REG R0, R0, R0 +BINARY_SUBSCR_REG R0, 'lst', R0 +RETURN_VALUE_REG 'lst' + """) + self.check_exec((), clear_reg, 5, reoptimize=False) + + def test_qsort2(self): + converter = self.optimize_func(qsort2) + self.check_bytecode(converter, """ +JUMP_IF_TRUE_REG 'sequence', 13 +BUILD_LIST_REG R0, 0 +RETURN_VALUE_REG R0 +LOAD_CONST_REG R1, 0 (const#1) +BINARY_SUBSCR_REG 'pivot', 'sequence', R1 +LOAD_GLOBAL_REG R1, 'partition' (name#0) +LOAD_CONST_REG R2, 1 (const#2) +LOAD_CONST_REG R3, None (const#0) +BUILD_SLICE2_REG R2, R2, R3 +BINARY_SUBSCR_REG R2, 'sequence', R2 +BUILD_LIST_REG R3, 0 +BUILD_LIST_REG R4, 1, 'pivot' +BUILD_LIST_REG R5, 0 +CALL_FUNCTION_REG R1, R1, (4 positional), R2, R3, R4, R5 +CLEAR_REG R4 +CLEAR_REG R5 +UNPACK_SEQUENCE_REG R1, 3, 'lesser', 'equal', 'greater' +CLEAR_REG R1 +LOAD_GLOBAL_REG R2, 'qsort2' (name#1) +CALL_FUNCTION_REG R3, R2, (1 positional), 'lesser' +BINARY_ADD_REG R3, R3, 'equal' +CALL_FUNCTION_REG R2, R2, (1 positional), 'greater' +BINARY_ADD_REG R3, R3, R2 +RETURN_VALUE_REG R3 + """) + data = list(range(50)); import random; random.shuffle(data); data=tuple(data) + expected = list(range(50)) + self.check_exec(expected, qsort2, data, reoptimize=False) + + def test_make_func(self): + converter = self.optimize_func(make_func) + self.check_bytecode(converter, """ +LOAD_CONST_REG R0, 'c' (const#1) +LOAD_CONST_REG R1, 5 (const#2) +LOAD_CONST_REG R2, (const#3) +LOAD_CONST_REG R3, 'make_func..f' (const#4) +MAKE_FUNCTION_REG 'f', R3, R2, 0, 1, R1, R0, 0 +CLEAR_REG R3 +LOAD_CONST_REG R0, 'a' (const#5) +LOAD_CONST_REG R1, 'b1' (const#6) +LOAD_CONST_REG R2, 'b2' (const#7) +CALL_FUNCTION_REG R0, 'f', (3 positional), R0, R1, R2 +RETURN_VALUE_REG R0 + """) + self.check_exec(("a", ("b1", "b2"), 5), make_func, reoptimize=False) + + def test_loop_compare_str(self): + converter = self.optimize_func(loop_compare_str) + self.check_bytecode(converter, """ +LOAD_CONST_REG 't', 't' (const#1) +LOAD_CONST_REG 's', 's' (const#2) +SETUP_LOOP +LOAD_GLOBAL_REG R0, 'range' (name#0) +LOAD_CONST_REG R1, 3 (const#3) +CALL_FUNCTION_REG R0, R0, (1 positional), R1 +CLEAR_REG R1 +GET_ITER_REG R0, R0 +FOR_ITER_REG 'loop', R0, +COMPARE_REG R2, '>=', 't', 's' +CLEAR_REG R2 +JUMP_ABSOLUTE 40 +CLEAR_REG R0 +POP_BLOCK +LOAD_CONST_REG R3, True (const#4) +RETURN_VALUE_REG R3 + """) + self.check_exec(True, loop_compare_str, reoptimize=False) + + def test_set_attr(self): + converter = self.optimize_func(set_attr) + self.check_bytecode(converter, """ +LOAD_ATTR_REG R0, 'obj', 'attr1' (name#0) +LOAD_CONST_REG R1, 1 (const#1) +INPLACE_ADD_REG R0, R1 +STORE_ATTR_REG 'obj', 'attr1' (name#0), R0 +LOAD_CONST_REG R1, 3 (const#2) +STORE_ATTR_REG 'obj', 'attr2' (name#1), R1 +CLEAR_REG R1 +LOAD_CONST_REG R0, None (const#0) +RETURN_VALUE_REG R0 + """) + + class Dummy: + pass + + dummy = Dummy() + dummy.attr1 = 0 + dummy.attr2 = 0 + self.check_exec(None, set_attr, dummy, reoptimize=False) + + def test_get_loader(self): + converter = self.optimize_func(get_loader) + # "LOAD_GLOBAL_REG R3, 'find_loader'" must not be moved in a + # conditional branch, but before the if + self.check_bytecode(converter, """ +JUMP_IF_FALSE_REG 'module_or_name', 52 +LOAD_GLOBAL_REG R0, 'getattr' (name#0) +LOAD_CONST_REG R1, '__loader__' (const#1) +LOAD_CONST_REG R2, None (const#0) +CALL_FUNCTION_REG 'loader', R0, (3 positional), 'module_or_name', R1, R2 +CLEAR_REG R0 +CLEAR_REG R1 +CLEAR_REG R2 +LOAD_ATTR_REG 'fullname', 'module_or_name', '__name__' (name#2) +JUMP_FORWARD +MOVE_REG 'fullname', 'module_or_name' +LOAD_GLOBAL_REG R3, 'find_loader' (name#3) +CALL_FUNCTION_REG R3, R3, (1 positional), 'fullname' +RETURN_VALUE_REG R3 + """) + self.can_reoptimize(get_loader) + + def test_find_loader(self): + converter = self.optimize_func(find_loader) + self.check_bytecode(converter, """ +SETUP_EXCEPT +LOAD_GLOBAL_REG R0, 'sys' (name#0) +LOAD_ATTR_REG R0, R0, 'modules' (name#1) +BINARY_SUBSCR_REG R0, R0, 'name' +LOAD_ATTR_REG 'loader', R0, '__loader__' (name#2) +LOAD_CONST_REG R0, None (const#0) +COMPARE_REG R0, 'is', 'loader', R0 +JUMP_IF_FALSE_REG R0, 95 +LOAD_GLOBAL_REG R1, 'ValueError' (name#4) +LOAD_CONST_REG R2, '{}.__loader__ is None' (const#1) +LOAD_ATTR_REG R2, R2, 'format' (name#5) +CALL_FUNCTION_REG R2, R2, (1 positional), 'name' +CALL_FUNCTION_REG R1, R1, (1 positional), R2 +CLEAR_REG R2 +PUSH_REG R1 +CLEAR_REG R1 +RAISE_VARARGS 1 +RETURN_VALUE_REG 'loader' +POP_REG R3 +PUSH_REG R3 +LOAD_GLOBAL_REG R4, 'KeyError' (name#6) +COMPARE_REG R3, 'exception match', R3, R4 +CLEAR_REG R4 +JUMP_IF_FALSE_REG R3, 133 +POP_TOP +POP_TOP +POP_TOP +POP_EXCEPT +JUMP_FORWARD +CLEAR_REG R3 +END_FINALLY +LOAD_GLOBAL_REG R5, '_bootstrap' (name#7) +LOAD_ATTR_REG R5, R5, '_find_module' (name#8) +CALL_FUNCTION_REG R5, R5, (2 positional), 'name', 'path' +RETURN_VALUE_REG R5 + """) + self.can_reoptimize(find_loader) + + + def test_store_load_global(self): + converter = self.optimize_func(store_load_global) + self.check_bytecode(converter, """ +LOAD_CONST_REG R0, 5 (const#1) +STORE_GLOBAL_REG 'global_var' (name#0), R0 +RETURN_VALUE_REG R0 + """) + self.check_exec(5, store_load_global) + + +def test_main(): + support.run_unittest( + OptimizeTests, + ) + +if __name__ == "__main__": + test_main() diff -r fb80df16c4ff -r 6a9ca0177020 Objects/frameobject.c --- a/Objects/frameobject.c Tue Oct 23 21:14:34 2012 +0300 +++ b/Objects/frameobject.c Tue May 20 11:04:02 2014 +0200 @@ -424,6 +424,7 @@ { PyObject **p, **valuestack; PyCodeObject *co; + Py_ssize_t i; PyObject_GC_UnTrack(f); Py_TRASHCAN_SAFE_BEGIN(f) @@ -438,6 +439,9 @@ Py_XDECREF(*p); } + for (i=0; if_registers[i]); + Py_XDECREF(f->f_back); Py_DECREF(f->f_builtins); Py_DECREF(f->f_globals); @@ -484,6 +488,9 @@ for (i = slots; --i >= 0; ++fastlocals) Py_VISIT(*fastlocals); + for (i=0; if_registers[i]); + /* stack */ if (f->f_stacktop != NULL) { for (p = f->f_valuestack; p < f->f_stacktop; p++) @@ -517,6 +524,9 @@ for (i = slots; --i >= 0; ++fastlocals) Py_CLEAR(*fastlocals); + for (i=0; if_registers[i]); + /* stack */ if (oldtop != NULL) { for (p = f->f_valuestack; p < oldtop; p++) @@ -648,7 +658,7 @@ ncells = PyTuple_GET_SIZE(code->co_cellvars); nfrees = PyTuple_GET_SIZE(code->co_freevars); extras = code->co_stacksize + code->co_nlocals + ncells + - nfrees; + nfrees + FRAME_NREGISTER; if (free_list == NULL) { f = PyObject_GC_NewVar(PyFrameObject, &PyFrame_Type, extras); @@ -682,6 +692,10 @@ f->f_locals = NULL; f->f_trace = NULL; f->f_exc_type = f->f_exc_value = f->f_exc_traceback = NULL; + f->f_registers = &f->f_localsplus[code->co_stacksize + code->co_nlocals + ncells + nfrees]; + for (i=0; if_registers[i] = NULL; + } f->f_stacktop = f->f_valuestack; f->f_builtins = builtins; diff -r fb80df16c4ff -r 6a9ca0177020 Python/ceval.c --- a/Python/ceval.c Tue Oct 23 21:14:34 2012 +0300 +++ b/Python/ceval.c Tue May 20 11:04:02 2014 +0200 @@ -106,16 +106,21 @@ /* Forward declarations */ #ifdef WITH_TSC static PyObject * call_function(PyObject ***, int, uint64*, uint64*); +static int call_function_reg(int, PyObject **, unsigned char **, uint64*, uint64*); #else static PyObject * call_function(PyObject ***, int); +static int call_function_reg(int, PyObject **, unsigned char **); #endif static PyObject * fast_function(PyObject *, PyObject ***, int, int, int); +static PyObject * fast_function_reg(PyObject *, PyObject *, PyObject **, unsigned char **, int, int, int); static PyObject * do_call(PyObject *, PyObject ***, int, int); +static PyObject * do_call_reg(PyObject *, PyObject **, unsigned char **, int, int); static PyObject * ext_do_call(PyObject *, PyObject ***, int, int, int); static PyObject * update_keyword_args(PyObject *, int, PyObject ***, PyObject *); static PyObject * update_star_args(int, int, PyObject *, PyObject ***); static PyObject * load_args(PyObject ***, int); +static PyObject * load_args_reg(int, PyObject **, unsigned char **); #define CALL_FLAG_VAR 1 #define CALL_FLAG_KW 2 @@ -754,6 +759,7 @@ static void restore_and_clear_exc_state(PyThreadState *, PyFrameObject *); static int do_raise(PyObject *, PyObject *); static int unpack_iterable(PyObject *, int, int, PyObject **); +static int unpack_iterable_reg(PyObject *, int, unsigned char **, PyObject **); /* Records whether tracing is on for any thread. Counts the number of threads for which tstate->c_tracefunc is non-NULL, so if the value @@ -795,7 +801,7 @@ register PyObject **stack_pointer; /* Next free slot in value stack */ register unsigned char *next_instr; register int opcode; /* Current opcode */ - register int oparg; /* Current opcode argument, if any */ + register int oparg, extendedarg; /* Current opcode argument, if any */ register enum why_code why; /* Reason for block stack unwind */ register PyObject **fastlocals, **freevars; PyObject *retval = NULL; /* Return value */ @@ -884,16 +890,13 @@ #define TARGET_WITH_IMPL(op, impl) \ TARGET_##op: \ opcode = op; \ - if (HAS_ARG(op)) \ - oparg = NEXTARG(); \ case op: \ goto impl; \ #define TARGET(op) \ TARGET_##op: \ + extendedarg = 0; \ opcode = op; \ - if (HAS_ARG(op)) \ - oparg = NEXTARG(); \ case op: @@ -989,11 +992,20 @@ #define INSTR_OFFSET() ((int)(next_instr - first_instr)) #define NEXTOP() (*next_instr++) -#define NEXTARG() (next_instr += 2, (next_instr[-1]<<8) + next_instr[-2]) +#define NEXTARG() (next_instr += 2, (next_instr[-1]<<8) + next_instr[-2] + extendedarg) +#define NEXTIMM8() (next_instr += 1, next_instr[-1]) + +/* FIXME: + extendedarg */ +#define NEXTARG_PTR(p_next_instr) (*(p_next_instr) += 2, ((*(p_next_instr))[-1]<<8) + (*(p_next_instr))[-2]) +#define NEXTIMM8_PTR(p_next_instr) (*(p_next_instr) += 1, (*(p_next_instr))[-1]) + +#define NEXTREG() (next_instr += 2, (next_instr[-1]<<8) + next_instr[-2]) #define PEEKARG() ((next_instr[2]<<8) + next_instr[1]) #define JUMPTO(x) (next_instr = first_instr + (x)) #define JUMPBY(x) (next_instr += (x)) +#define NEXTREG_PTR(p_next_instr) (*(p_next_instr) += 2, ((*(p_next_instr))[-1]<<8) + (*(p_next_instr))[-2]) + /* OpCode prediction macros Some opcodes tend to come in pairs thus making it possible to predict the second code when the first is run. For example, @@ -1028,7 +1040,7 @@ #else #define PREDICT(op) if (*next_instr == op) goto PRED_##op #define PREDICTED(op) PRED_##op: next_instr++ -#define PREDICTED_WITH_ARG(op) PRED_##op: oparg = PEEKARG(); next_instr += 3 +#define PREDICTED_WITH_ARG(op) PREDICTED(op) #endif @@ -1074,6 +1086,7 @@ /* Local variable macros */ #define GETLOCAL(i) (fastlocals[i]) +#define GETREG(i) (f->f_registers[i]) /* The SETLOCAL() macro must not DECREF the local variable in-place and then store the new value; it must copy the old value to a temporary @@ -1300,12 +1313,11 @@ /* Extract opcode and argument */ + extendedarg = 0; opcode = NEXTOP(); - oparg = 0; /* allows oparg to be stored in a register because + dispatch_opcode: + oparg = -1; /* allows oparg to be stored in a register because it doesn't have to be remembered across a full loop */ - if (HAS_ARG(opcode)) - oparg = NEXTARG(); - dispatch_opcode: #ifdef DYNAMIC_EXECUTION_PROFILE #ifdef DXPAIRS dxpairs[lastopcode][opcode]++; @@ -1343,7 +1355,9 @@ FAST_DISPATCH(); TARGET(LOAD_FAST) { - PyObject *value = GETLOCAL(oparg); + PyObject *value; + oparg = NEXTARG(); + value = GETLOCAL(oparg); if (value == NULL) { format_exc_check_arg(PyExc_UnboundLocalError, UNBOUNDLOCAL_ERROR_MSG, @@ -1356,16 +1370,57 @@ } TARGET(LOAD_CONST) { - PyObject *value = GETITEM(consts, oparg); + PyObject *value; + oparg = NEXTARG(); + value = GETITEM(consts, oparg); Py_INCREF(value); PUSH(value); FAST_DISPATCH(); } + TARGET(LOAD_CONST_REG) { + PyObject *value; + int reg = NEXTREG(); + oparg = NEXTARG(); + value = GETITEM(consts, oparg); + Py_INCREF(value); + SETLOCAL(reg, value); + FAST_DISPATCH(); + } + PREDICTED_WITH_ARG(STORE_FAST); TARGET(STORE_FAST) { + PyObject *value; + oparg = NEXTARG(); + value = POP(); + SETLOCAL(oparg, value); + FAST_DISPATCH(); + } + + TARGET(POP_REG) { + int reg = NEXTREG(); PyObject *value = POP(); - SETLOCAL(oparg, value); + SETLOCAL(reg, value); + FAST_DISPATCH(); + } + + TARGET(MOVE_REG) { + int reg0 = NEXTREG(); + int reg1 = NEXTREG(); + PyObject *value; + value = GETLOCAL(reg1); + Py_INCREF(value); + SETLOCAL(reg0, value); + FAST_DISPATCH(); + } + + TARGET(PUSH_REG) { + int reg = NEXTREG(); + PyObject *value; + value = GETLOCAL(reg); + /* FIXME: check for NULL? */ + Py_INCREF(value); + PUSH(value); FAST_DISPATCH(); } @@ -1375,6 +1430,13 @@ FAST_DISPATCH(); } + TARGET(CLEAR_REG) { + int reg; + reg = NEXTREG(); + SETLOCAL(reg, NULL); + FAST_DISPATCH(); + } + TARGET(ROT_TWO) { PyObject *top = TOP(); PyObject *second = SECOND(); @@ -1421,6 +1483,17 @@ DISPATCH(); } + TARGET(UNARY_POSITIVE_REG) { + int reg0 = NEXTREG(); + int reg1 = NEXTREG(); + PyObject *value = GETLOCAL(reg1); + PyObject *res = PyNumber_Positive(value); + if (res == NULL) + goto error; + SETLOCAL(reg0, res); + DISPATCH(); + } + TARGET(UNARY_NEGATIVE) { PyObject *value = TOP(); PyObject *res = PyNumber_Negative(value); @@ -1431,6 +1504,17 @@ DISPATCH(); } + TARGET(UNARY_NEGATIVE_REG) { + int reg0 = NEXTREG(); + int reg1 = NEXTREG(); + PyObject *value = GETLOCAL(reg1); + PyObject *res = PyNumber_Negative(value); + if (res == NULL) + goto error; + SETLOCAL(reg0, res); + DISPATCH(); + } + TARGET(UNARY_NOT) { PyObject *value = TOP(); int err = PyObject_IsTrue(value); @@ -1450,6 +1534,27 @@ goto error; } + TARGET(UNARY_NOT_REG) { + int reg0 = NEXTREG(); + int reg1 = NEXTREG(); + PyObject *value; + int err; + + value = GETLOCAL(reg1); + err = PyObject_IsTrue(value); + if (err == 0) { + Py_INCREF(Py_True); + SETLOCAL(reg0, Py_True); + DISPATCH(); + } + else if (err > 0) { + Py_INCREF(Py_False); + SETLOCAL(reg0, Py_False); + DISPATCH(); + } + goto error; + } + TARGET(UNARY_INVERT) { PyObject *value = TOP(); PyObject *res = PyNumber_Invert(value); @@ -1484,6 +1589,33 @@ DISPATCH(); } + TARGET(BINARY_MULTIPLY_REG) { + PyObject *product; + int reg0 = NEXTREG(); + int reg1 = NEXTREG(); + int reg2 = NEXTREG(); + PyObject *left = GETLOCAL(reg1); + PyObject *right = GETLOCAL(reg2); + product = PyNumber_Multiply(left, right); + if (product == NULL) + goto error; + SETLOCAL(reg0, product); + DISPATCH(); + } + + TARGET(INPLACE_MULTIPLY_REG) { + PyObject *product; + int reg1 = NEXTREG(); + int reg2 = NEXTREG(); + PyObject *left = GETLOCAL(reg1); + PyObject *right = GETLOCAL(reg2); + product = PyNumber_InPlaceMultiply(left, right); + if (product == NULL) + goto error; + SETLOCAL(reg1, product); + DISPATCH(); + } + TARGET(BINARY_TRUE_DIVIDE) { PyObject *divisor = POP(); PyObject *dividend = TOP(); @@ -1496,6 +1628,22 @@ DISPATCH(); } + TARGET(BINARY_TRUE_DIVIDE_REG) { + int reg0, reg1, reg2; + PyObject *divisor, *dividend, *quotient; + + reg0 = NEXTREG(); + reg1 = NEXTREG(); + reg2 = NEXTREG(); + divisor = GETLOCAL(reg1); + dividend = GETLOCAL(reg2); + quotient = PyNumber_TrueDivide(dividend, divisor); + if (quotient == NULL) + goto error; + SETLOCAL(reg0, quotient); + DISPATCH(); + } + TARGET(BINARY_FLOOR_DIVIDE) { PyObject *divisor = POP(); PyObject *dividend = TOP(); @@ -1508,6 +1656,33 @@ DISPATCH(); } + TARGET(BINARY_FLOOR_DIVIDE_REG) { + int reg0 = NEXTREG(); + int reg1 = NEXTREG(); + int reg2 = NEXTREG(); + PyObject *divisor = GETLOCAL(reg2); + PyObject *dividend = GETLOCAL(reg1); + PyObject *quotient; + quotient = PyNumber_FloorDivide(dividend, divisor); + if (quotient == NULL) + goto error; + SETLOCAL(reg0, quotient); + DISPATCH(); + } + + TARGET(INPLACE_FLOOR_DIVIDE_REG) { + int reg1 = NEXTREG(); + int reg2 = NEXTREG(); + PyObject *dividend = GETLOCAL(reg1); + PyObject *divisor = GETLOCAL(reg2); + PyObject *quotient; + quotient = PyNumber_InPlaceFloorDivide(dividend, divisor); + if (quotient == NULL) + goto error; + SETLOCAL(reg1, quotient); + DISPATCH(); + } + TARGET(BINARY_MODULO) { PyObject *divisor = POP(); PyObject *dividend = TOP(); @@ -1522,6 +1697,25 @@ DISPATCH(); } + TARGET(BINARY_MODULO_REG) { + int reg_result, reg_divisor, reg_dividend; + PyObject *divisor, *dividend, *res; + + reg_result = NEXTREG(); + reg_dividend = NEXTREG(); + reg_divisor = NEXTREG(); + dividend = GETLOCAL(reg_dividend); + divisor = GETLOCAL(reg_divisor); + if (PyUnicode_CheckExact(dividend)) + res = PyUnicode_Format(dividend, divisor); + else + res = PyNumber_Remainder(dividend, divisor); + if (res == NULL) + goto error; + SETLOCAL(reg_result, res); + DISPATCH(); + } + TARGET(BINARY_ADD) { PyObject *right = POP(); PyObject *left = TOP(); @@ -1542,6 +1736,58 @@ DISPATCH(); } + TARGET(BINARY_ADD_REG) { + PyObject *sum; + int reg0 = NEXTREG(); + int reg1 = NEXTREG(); + int reg2 = NEXTREG(); + PyObject *left = GETLOCAL(reg1); + PyObject *right = GETLOCAL(reg2); + + /* FIXME: enable unicode_concatenate() optimization, + refleak issue on left? */ + #if 0 + if (PyUnicode_CheckExact(left) && + PyUnicode_CheckExact(right)) { + sum = unicode_concatenate(left, right, f, next_instr); + /* unicode_concatenate consumed the ref to v */ + } + else + #endif + { + sum = PyNumber_Add(left, right); + } + if (sum == NULL) + goto error; + SETLOCAL(reg0, sum); + DISPATCH(); + } + + TARGET(INPLACE_ADD_REG) { + PyObject *sum; + int reg1 = NEXTREG(); + int reg2 = NEXTREG(); + PyObject *left = GETLOCAL(reg1); + PyObject *right = GETLOCAL(reg2); + + /* FIXME: enable this optimization */ + #if 0 + if (PyUnicode_CheckExact(left) && PyUnicode_CheckExact(right)) { + Py_INCREF(left); + sum = unicode_concatenate(left, right, f, next_instr); + /* unicode_concatenate consumed the ref to v */ + } + else + #endif + { + sum = PyNumber_InPlaceAdd(left, right); + } + if (sum == NULL) + goto error; + SETLOCAL(reg1, sum); + DISPATCH(); + } + TARGET(BINARY_SUBTRACT) { PyObject *right = POP(); PyObject *left = TOP(); @@ -1554,6 +1800,33 @@ DISPATCH(); } + TARGET(BINARY_SUBTRACT_REG) { + int reg0 = NEXTREG(); + int reg1 = NEXTREG(); + int reg2 = NEXTREG(); + PyObject *left = GETLOCAL(reg1); + PyObject *right = GETLOCAL(reg2); + PyObject *diff; + diff = PyNumber_Subtract(left, right); + if (diff == NULL) + goto error; + SETLOCAL(reg0, diff); + DISPATCH(); + } + + TARGET(INPLACE_SUBTRACT_REG) { + int reg1 = NEXTREG(); + int reg2 = NEXTREG(); + PyObject *left = GETLOCAL(reg1); + PyObject *right = GETLOCAL(reg2); + PyObject *diff; + diff = PyNumber_InPlaceSubtract(left, right); + if (diff == NULL) + goto error; + SETLOCAL(reg1, diff); + DISPATCH(); + } + TARGET(BINARY_SUBSCR) { PyObject *sub = POP(); PyObject *container = TOP(); @@ -1566,6 +1839,19 @@ DISPATCH(); } + TARGET(BINARY_SUBSCR_REG) { + int reg0 = NEXTREG(); + int reg1 = NEXTREG(); + int reg2 = NEXTREG(); + PyObject *sub = GETLOCAL(reg2); + PyObject *container = GETLOCAL(reg1); + PyObject *res = PyObject_GetItem(container, sub); + if (res == NULL) + goto error; + SETLOCAL(reg0, res); + DISPATCH(); + } + TARGET(BINARY_LSHIFT) { PyObject *right = POP(); PyObject *left = TOP(); @@ -1627,8 +1913,10 @@ } TARGET(LIST_APPEND) { - PyObject *v = POP(); - PyObject *list = PEEK(oparg); + PyObject *v, *list; + oparg = NEXTARG(); + v = POP(); + list = PEEK(oparg); int err; err = PyList_Append(list, v); Py_DECREF(v); @@ -1638,9 +1926,27 @@ DISPATCH(); } + TARGET(LIST_APPEND_REG) { + int reg_list, reg_v; + PyObject *list, *v; + int err; + + reg_list = NEXTREG(); + reg_v = NEXTREG(); + list = GETLOCAL(reg_list); + v = GETLOCAL(reg_v); + err = PyList_Append(list, v); + if (err != 0) + goto error; + PREDICT(JUMP_ABSOLUTE); + DISPATCH(); + } + TARGET(SET_ADD) { - PyObject *v = POP(); - PyObject *set = stack_pointer[-oparg]; + PyObject *v, *set; + oparg = NEXTARG(); + v = POP(); + set = stack_pointer[-oparg]; int err; err = PySet_Add(set, v); Py_DECREF(v); @@ -1686,6 +1992,22 @@ DISPATCH(); } + TARGET(INPLACE_TRUE_DIVIDE_REG) { + int reg1, reg2; + PyObject *divisor, *dividend, *quotient; + + reg1 = NEXTREG(); + reg2 = NEXTREG(); + + dividend = GETLOCAL(reg1); + divisor = GETLOCAL(reg2); + quotient = PyNumber_InPlaceTrueDivide(dividend, divisor); + if (quotient == NULL) + goto error; + SETLOCAL(reg1, quotient); + DISPATCH(); + } + TARGET(INPLACE_FLOOR_DIVIDE) { PyObject *divisor = POP(); PyObject *dividend = TOP(); @@ -1710,6 +2032,20 @@ DISPATCH(); } + TARGET(INPLACE_MODULO_REG) { + int reg1, reg2; + PyObject *right, *left, *mod; + reg1 = NEXTREG(); + reg2 = NEXTREG(); + left = GETLOCAL(reg1); + right = GETLOCAL(reg2); + mod = PyNumber_InPlaceRemainder(left, right); + if (mod == NULL) + goto error; + SETLOCAL(reg1, mod); + DISPATCH(); + } + TARGET(INPLACE_ADD) { PyObject *right = POP(); PyObject *left = TOP(); @@ -1817,6 +2153,26 @@ DISPATCH(); } + TARGET(STORE_SUBSCR_REG) { + int reg0, reg1, reg2; + PyObject *sub, *container, *v; + int err; + + reg0 = NEXTREG(); + reg1 = NEXTREG(); + reg2 = NEXTREG(); + + container = GETLOCAL(reg0); + sub = GETLOCAL(reg1); + v = GETLOCAL(reg2); + + /* v[w] = u */ + err = PyObject_SetItem(container, sub, v); + if (err != 0) + goto error; + DISPATCH(); + } + TARGET(DELETE_SUBSCR) { PyObject *sub = TOP(); PyObject *container = SECOND(); @@ -1831,6 +2187,22 @@ DISPATCH(); } + TARGET(DELETE_SUBSCR_REG) { + int reg_sub, reg_container; + PyObject *sub, *container; + int err; + + reg_container = NEXTREG(); + reg_sub = NEXTREG(); + container = GETLOCAL(reg_container); + sub = GETLOCAL(reg_sub); + /* del v[w] */ + err = PyObject_DelItem(container, sub); + if (err != 0) + goto error; + DISPATCH(); + } + TARGET(PRINT_EXPR) { PyObject *value = POP(); PyObject *hook = PySys_GetObject("displayhook"); @@ -1854,6 +2226,7 @@ #endif TARGET(RAISE_VARARGS) { PyObject *cause = NULL, *exc = NULL; + oparg = NEXTARG(); switch (oparg) { case 2: cause = POP(); /* cause */ @@ -1881,12 +2254,31 @@ DISPATCH(); } + TARGET(STORE_LOCALS_REG) { + int reg = NEXTREG(); + PyObject *locals, *old; + old = f->f_locals; + locals = GETLOCAL(reg); + Py_INCREF(locals); + Py_XDECREF(old); + f->f_locals = locals; + DISPATCH(); + } + TARGET(RETURN_VALUE) { retval = POP(); why = WHY_RETURN; goto fast_block_end; } + TARGET(RETURN_VALUE_REG) { + int reg = NEXTREG(); + retval = GETLOCAL(reg); + Py_INCREF(retval); + why = WHY_RETURN; + goto fast_block_end; + } + TARGET(YIELD_FROM) { PyObject *v = POP(); PyObject *reciever = TOP(); @@ -1925,6 +2317,17 @@ goto fast_yield; } + TARGET(YIELD_REG) { + int reg; + reg = NEXTREG(); + retval = GETLOCAL(reg); + Py_INCREF(retval); + f->f_stacktop = stack_pointer; + why = WHY_YIELD; + f->f_lasti = INSTR_OFFSET() - 1; + goto fast_yield; + } + TARGET(POP_EXCEPT) { PyTryBlock *b = PyFrame_BlockPop(f); if (b->b_type != EXCEPT_HANDLER) { @@ -2012,10 +2415,44 @@ DISPATCH(); } + TARGET(LOAD_BUILD_CLASS_REG) { + _Py_IDENTIFIER(__build_class__); + int reg; + + reg = NEXTREG(); + + PyObject *bc; + if (PyDict_CheckExact(f->f_builtins)) { + bc = _PyDict_GetItemId(f->f_builtins, &PyId___build_class__); + if (bc == NULL) { + PyErr_SetString(PyExc_NameError, + "__build_class__ not found"); + goto error; + } + Py_INCREF(bc); + } + else { + PyObject *build_class_str = _PyUnicode_FromId(&PyId___build_class__); + if (build_class_str == NULL) + break; + bc = PyObject_GetItem(f->f_builtins, build_class_str); + if (bc == NULL) { + if (PyErr_ExceptionMatches(PyExc_KeyError)) + PyErr_SetString(PyExc_NameError, + "__build_class__ not found"); + goto error; + } + } + SETLOCAL(reg, bc); + DISPATCH(); + } + TARGET(STORE_NAME) { - PyObject *name = GETITEM(names, oparg); - PyObject *v = POP(); - PyObject *ns = f->f_locals; + PyObject *name, *v, *ns; + oparg = NEXTARG(); + name = GETITEM(names, oparg); + v = POP(); + ns = f->f_locals; int err; if (ns == NULL) { PyErr_Format(PyExc_SystemError, @@ -2033,9 +2470,34 @@ DISPATCH(); } + TARGET(STORE_NAME_REG) { + int reg; + PyObject *name, *v, *ns; + oparg = NEXTARG(); + reg = NEXTREG(); + name = GETITEM(names, oparg); + v = GETLOCAL(reg); + ns = f->f_locals; + int err; + if (ns == NULL) { + PyErr_Format(PyExc_SystemError, + "no locals found when storing %R", name); + goto error; + } + if (PyDict_CheckExact(ns)) + err = PyDict_SetItem(ns, name, v); + else + err = PyObject_SetItem(ns, name, v); + if (err != 0) + goto error; + DISPATCH(); + } + TARGET(DELETE_NAME) { - PyObject *name = GETITEM(names, oparg); - PyObject *ns = f->f_locals; + PyObject *name, *ns; + oparg = NEXTARG(); + name = GETITEM(names, oparg); + ns = f->f_locals; int err; if (ns == NULL) { PyErr_Format(PyExc_SystemError, @@ -2054,7 +2516,9 @@ PREDICTED_WITH_ARG(UNPACK_SEQUENCE); TARGET(UNPACK_SEQUENCE) { - PyObject *seq = POP(), *item, **items; + PyObject *seq, *item, **items; + oparg = NEXTARG(); + seq = POP(); if (PyTuple_CheckExact(seq) && PyTuple_GET_SIZE(seq) == oparg) { items = ((PyTupleObject *)seq)->ob_item; @@ -2083,8 +2547,49 @@ DISPATCH(); } + PREDICTED(UNPACK_SEQUENCE_REG); + TARGET(UNPACK_SEQUENCE_REG) { + int nreg; + int regseq, reg, i; + PyObject *seq, *item, **items; + regseq = NEXTREG(); + seq = GETLOCAL(regseq); + nreg = NEXTARG(); + assert(nreg >= 1); + if (PyTuple_CheckExact(seq) && + PyTuple_GET_SIZE(seq) == nreg) { + items = ((PyTupleObject *)seq)->ob_item; + for (i=0; iob_item; + for (i=0; i> 8); + int totalargs; + oparg = NEXTARG(); + totalargs = 1 + (oparg & 0xFF) + (oparg >> 8); PyObject *seq = POP(); if (unpack_iterable(seq, oparg & 0xFF, oparg >> 8, @@ -2098,10 +2603,28 @@ DISPATCH(); } + TARGET(UNPACK_EX_REG) { + int regseq; + PyObject *seq; + unsigned char *next_instr_var; + int ok; + regseq = NEXTREG(); + seq = GETLOCAL(regseq); + + next_instr_var = next_instr; + ok = unpack_iterable_reg(seq, -1, &next_instr_var, fastlocals); + next_instr = next_instr_var; + if (!ok) + goto error; + DISPATCH(); + } + TARGET(STORE_ATTR) { - PyObject *name = GETITEM(names, oparg); - PyObject *owner = TOP(); - PyObject *v = SECOND(); + PyObject *name, *owner, *v; + oparg = NEXTARG(); + name = GETITEM(names, oparg); + owner = TOP(); + v = SECOND(); int err; STACKADJ(-2); err = PyObject_SetAttr(owner, name, v); @@ -2112,10 +2635,27 @@ DISPATCH(); } + TARGET(STORE_ATTR_REG) { + PyObject *name, *owner, *v; + int err, reg_owner, reg_value; + reg_owner = NEXTREG(); + oparg = NEXTARG(); + reg_value = NEXTREG(); + owner = GETLOCAL(reg_owner); + name = GETITEM(names, oparg); + v = GETLOCAL(reg_value); + err = PyObject_SetAttr(owner, name, v); + if (err != 0) + goto error; + DISPATCH(); + } + TARGET(DELETE_ATTR) { - PyObject *name = GETITEM(names, oparg); - PyObject *owner = POP(); + PyObject *name, *owner; int err; + oparg = NEXTARG(); + name = GETITEM(names, oparg); + owner = POP(); err = PyObject_SetAttr(owner, name, (PyObject *)NULL); Py_DECREF(owner); if (err != 0) @@ -2124,9 +2664,11 @@ } TARGET(STORE_GLOBAL) { - PyObject *name = GETITEM(names, oparg); - PyObject *v = POP(); + PyObject *name, *v; int err; + oparg = NEXTARG(); + name = GETITEM(names, oparg); + v = POP(); err = PyDict_SetItem(f->f_globals, name, v); Py_DECREF(v); if (err != 0) @@ -2134,9 +2676,25 @@ DISPATCH(); } + TARGET(STORE_GLOBAL_REG) { + PyObject *name, *v; + int reg; + int err; + oparg = NEXTARG(); + reg = NEXTREG(); + name = GETITEM(names, oparg); + v = GETLOCAL(reg); + err = PyDict_SetItem(f->f_globals, name, v); + if (err != 0) + goto error; + DISPATCH(); + } + TARGET(DELETE_GLOBAL) { - PyObject *name = GETITEM(names, oparg); + PyObject *name; int err; + oparg = NEXTARG(); + name = GETITEM(names, oparg); err = PyDict_DelItem(f->f_globals, name); if (err != 0) { format_exc_check_arg( @@ -2147,9 +2705,10 @@ } TARGET(LOAD_NAME) { - PyObject *name = GETITEM(names, oparg); - PyObject *locals = f->f_locals; - PyObject *v; + PyObject *name, *locals, *v; + oparg = NEXTARG(); + name = GETITEM(names, oparg); + locals = f->f_locals; if (locals == NULL) { PyErr_Format(PyExc_SystemError, "no locals when loading %R", name); @@ -2198,9 +2757,65 @@ DISPATCH(); } + TARGET(LOAD_NAME_REG) { + PyObject *name, *locals, *v; + int reg; + reg = NEXTREG(); + oparg = NEXTARG(); + name = GETITEM(names, oparg); + locals = f->f_locals; + if (locals == NULL) { + PyErr_Format(PyExc_SystemError, + "no locals when loading %R", name); + goto error; + } + if (PyDict_CheckExact(locals)) { + v = PyDict_GetItem(locals, name); + Py_XINCREF(v); + } + else { + v = PyObject_GetItem(locals, name); + if (v == NULL && PyErr_Occurred()) { + if (!PyErr_ExceptionMatches( + PyExc_KeyError)) + break; + PyErr_Clear(); + } + } + if (v == NULL) { + v = PyDict_GetItem(f->f_globals, name); + Py_XINCREF(v); + if (v == NULL) { + if (PyDict_CheckExact(f->f_builtins)) { + v = PyDict_GetItem(f->f_builtins, name); + if (v == NULL) { + format_exc_check_arg( + PyExc_NameError, + NAME_ERROR_MSG, name); + goto error; + } + Py_INCREF(v); + } + else { + v = PyObject_GetItem(f->f_builtins, name); + if (v == NULL) { + if (PyErr_ExceptionMatches(PyExc_KeyError)) + format_exc_check_arg( + PyExc_NameError, + NAME_ERROR_MSG, name); + goto error; + } + } + } + } + SETLOCAL(reg, v); + DISPATCH(); + } + TARGET(LOAD_GLOBAL) { - PyObject *name = GETITEM(names, oparg); - PyObject *v; + PyObject *name, *v; + oparg = NEXTARG(); + name = GETITEM(names, oparg); if (PyDict_CheckExact(f->f_globals) && PyDict_CheckExact(f->f_builtins)) { v = _PyDict_LoadGlobal((PyDictObject *)f->f_globals, @@ -2232,8 +2847,47 @@ DISPATCH(); } + TARGET(LOAD_GLOBAL_REG) { + PyObject *name; + PyObject *v; + int reg = NEXTREG(); + oparg = NEXTARG(); + name = GETITEM(names, oparg); + if (PyDict_CheckExact(f->f_globals) + && PyDict_CheckExact(f->f_builtins)) { + v = _PyDict_LoadGlobal((PyDictObject *)f->f_globals, + (PyDictObject *)f->f_builtins, + name); + if (v == NULL) { + if (!PyErr_Occurred()) + format_exc_check_arg(PyExc_NameError, + GLOBAL_NAME_ERROR_MSG, name); + goto error; + } + Py_INCREF(v); + } + else { + /* Slow-path if globals or builtins is not a dict */ + v = PyObject_GetItem(f->f_globals, name); + if (v == NULL) { + v = PyObject_GetItem(f->f_builtins, name); + if (v == NULL) { + if (PyErr_ExceptionMatches(PyExc_KeyError)) + format_exc_check_arg( + PyExc_NameError, + GLOBAL_NAME_ERROR_MSG, name); + goto error; + } + } + } + SETLOCAL(reg, v); + DISPATCH(); + } + TARGET(DELETE_FAST) { - PyObject *v = GETLOCAL(oparg); + PyObject *v; + oparg = NEXTARG(); + v = GETLOCAL(oparg); if (v != NULL) { SETLOCAL(oparg, NULL); DISPATCH(); @@ -2247,7 +2901,9 @@ } TARGET(DELETE_DEREF) { - PyObject *cell = freevars[oparg]; + PyObject *cell; + oparg = NEXTARG(); + cell = freevars[oparg]; if (PyCell_GET(cell) != NULL) { PyCell_Set(cell, NULL); DISPATCH(); @@ -2257,15 +2913,30 @@ } TARGET(LOAD_CLOSURE) { - PyObject *cell = freevars[oparg]; + PyObject *cell; + oparg = NEXTARG(); + cell = freevars[oparg]; Py_INCREF(cell); PUSH(cell); DISPATCH(); } + TARGET(LOAD_CLOSURE_REG) { + PyObject *cell; + int reg; + reg = NEXTREG(); + oparg = NEXTARG(); + cell = freevars[oparg]; + Py_INCREF(cell); + SETLOCAL(reg, cell); + DISPATCH(); + } + TARGET(LOAD_DEREF) { - PyObject *cell = freevars[oparg]; - PyObject *value = PyCell_GET(cell); + PyObject *cell, *value; + oparg = NEXTARG(); + cell = freevars[oparg]; + value = PyCell_GET(cell); if (value == NULL) { format_exc_unbound(co, oparg); goto error; @@ -2275,16 +2946,47 @@ DISPATCH(); } + TARGET(LOAD_DEREF_REG) { + PyObject *cell, *value; + int reg; + reg = NEXTREG(); + oparg = NEXTARG(); + cell = freevars[oparg]; + value = PyCell_GET(cell); + if (value == NULL) { + format_exc_unbound(co, oparg); + goto error; + } + Py_INCREF(value); + SETLOCAL(reg, value); + DISPATCH(); + } + TARGET(STORE_DEREF) { - PyObject *v = POP(); - PyObject *cell = freevars[oparg]; + PyObject *v, *cell; + oparg = NEXTARG(); + v = POP(); + cell = freevars[oparg]; PyCell_Set(cell, v); Py_DECREF(v); DISPATCH(); } + TARGET(STORE_DEREF_REG) { + PyObject *v, *cell; + int reg; + reg = NEXTREG(); + oparg = NEXTARG(); + v = GETLOCAL(reg); + cell = freevars[oparg]; + PyCell_Set(cell, v); + DISPATCH(); + } + TARGET(BUILD_TUPLE) { - PyObject *tup = PyTuple_New(oparg); + PyObject *tup; + oparg = NEXTARG(); + tup = PyTuple_New(oparg); if (tup == NULL) goto error; while (--oparg >= 0) { @@ -2295,8 +2997,30 @@ DISPATCH(); } + TARGET(BUILD_TUPLE_REG) { + int i, nreg; + int regres; + PyObject *tup; + + regres = NEXTREG(); + nreg = NEXTARG(); + tup = PyTuple_New(nreg); + if (tup == NULL) + goto error; + for (i=0; i= 0) { @@ -2307,9 +3031,31 @@ DISPATCH(); } + TARGET(BUILD_LIST_REG) { + PyObject *list, *item; + int i, nreg, regres, reg; + regres = NEXTREG(); + nreg = NEXTARG(); + list = PyList_New(nreg); + if (list == NULL) + goto error; + for (i=0; i= 0) { @@ -2327,13 +3073,27 @@ } TARGET(BUILD_MAP) { - PyObject *map = _PyDict_NewPresized((Py_ssize_t)oparg); + PyObject *map; + oparg = NEXTARG(); + map = _PyDict_NewPresized((Py_ssize_t)oparg); if (map == NULL) goto error; PUSH(map); DISPATCH(); } + TARGET(BUILD_MAP_REG) { + PyObject *map; + int reg; + reg = NEXTREG(); + oparg = NEXTARG(); + map = _PyDict_NewPresized((Py_ssize_t)oparg); + if (map == NULL) + goto error; + SETLOCAL(reg, map); + DISPATCH(); + } + TARGET(STORE_MAP) { PyObject *key = TOP(); PyObject *value = SECOND(); @@ -2349,11 +3109,29 @@ DISPATCH(); } + TARGET(STORE_MAP_REG) { + int reg0, reg1, reg2; + PyObject *map, *key, *value; + int err; + reg0 = NEXTREG(); + reg1 = NEXTREG(); + reg2 = NEXTREG(); + map = GETLOCAL(reg0); + key = GETLOCAL(reg1); + value = GETLOCAL(reg2); + assert(PyDict_CheckExact(map)); + err = PyDict_SetItem(map, key, value); + if (err != 0) + goto error; + DISPATCH(); + } + TARGET(MAP_ADD) { - PyObject *key = TOP(); - PyObject *value = SECOND(); - PyObject *map; + PyObject *key, *value, *map; int err; + oparg = NEXTARG(); + key = TOP(); + value = SECOND(); STACKADJ(-2); map = stack_pointer[-oparg]; /* dict */ assert(PyDict_CheckExact(map)); @@ -2366,10 +3144,30 @@ DISPATCH(); } + TARGET(MAP_ADD_REG) { + int regm, regk, regv; + PyObject *key, *value, *map; + int err; + regm = NEXTREG(); + regk = NEXTREG(); + regv = NEXTREG(); + map = GETLOCAL(regm); + key = GETLOCAL(regk); + value = GETLOCAL(regv); + assert(PyDict_CheckExact(map)); + err = PyDict_SetItem(map, key, value); /* v[w] = u */ + if (err != 0) + goto error; + PREDICT(JUMP_ABSOLUTE); + DISPATCH(); + } + TARGET(LOAD_ATTR) { - PyObject *name = GETITEM(names, oparg); - PyObject *owner = TOP(); - PyObject *res = PyObject_GetAttr(owner, name); + PyObject *name, *owner, *res; + oparg = NEXTARG(); + name = GETITEM(names, oparg); + owner = TOP(); + res = PyObject_GetAttr(owner, name); Py_DECREF(owner); SET_TOP(res); if (res == NULL) @@ -2377,10 +3175,26 @@ DISPATCH(); } + TARGET(LOAD_ATTR_REG) { + PyObject *name, *owner, *res; + int reg0 = NEXTREG(); + int reg1 = NEXTREG(); + oparg = NEXTARG(); + owner = GETLOCAL(reg1); + name = GETITEM(names, oparg); + res = PyObject_GetAttr(owner, name); + if (res == NULL) + goto error; + SETLOCAL(reg0, res); + DISPATCH(); + } + TARGET(COMPARE_OP) { - PyObject *right = POP(); - PyObject *left = TOP(); - PyObject *res = cmp_outcome(oparg, left, right); + PyObject *right, *left, *res; + oparg = NEXTARG(); + right = POP(); + left = TOP(); + res = cmp_outcome(oparg, left, right); Py_DECREF(left); Py_DECREF(right); SET_TOP(res); @@ -2391,11 +3205,31 @@ DISPATCH(); } + TARGET(COMPARE_REG) { + int reg0, reg1, reg2; + PyObject *left, *right, *res; + + reg0 = NEXTREG(); + oparg = NEXTARG(); + reg1 = NEXTREG(); + reg2 = NEXTREG(); + left = GETLOCAL(reg1); + right = GETLOCAL(reg2); + res = cmp_outcome(oparg, left, right); + if (res == NULL) + goto error; + SETLOCAL(reg0, res); + PREDICT(JUMP_IF_FALSE_REG); + PREDICT(JUMP_IF_TRUE_REG); + DISPATCH(); + } + TARGET(IMPORT_NAME) { _Py_IDENTIFIER(__import__); - PyObject *name = GETITEM(names, oparg); - PyObject *func = _PyDict_GetItemId(f->f_builtins, &PyId___import__); - PyObject *from, *level, *args, *res; + PyObject *name, *func, *from, *level, *args, *res; + oparg = NEXTARG(); + name = GETITEM(names, oparg); + func = _PyDict_GetItemId(f->f_builtins, &PyId___import__); if (func == NULL) { PyErr_SetString(PyExc_ImportError, "__import__ not found"); @@ -2437,6 +3271,55 @@ DISPATCH(); } + TARGET(IMPORT_NAME_REG) { + _Py_IDENTIFIER(__import__); + int reg_result, reg; + PyObject *name, *func, *from, *level, *args, *res; + reg_result = NEXTREG(); + oparg = NEXTARG(); + name = GETITEM(names, oparg); + func = _PyDict_GetItemId(f->f_builtins, &PyId___import__); + if (func == NULL) { + PyErr_SetString(PyExc_ImportError, + "__import__ not found"); + goto error; + } + Py_INCREF(func); + reg = NEXTREG(); + from = GETLOCAL(reg); + reg = NEXTREG(); + level = GETLOCAL(reg); + if (PyLong_AsLong(level) != -1 || PyErr_Occurred()) + args = PyTuple_Pack(5, + name, + f->f_globals, + f->f_locals == NULL ? + Py_None : f->f_locals, + from, + level); + else + args = PyTuple_Pack(4, + name, + f->f_globals, + f->f_locals == NULL ? + Py_None : f->f_locals, + from); + if (args == NULL) { + Py_DECREF(func); + STACKADJ(-1); + goto error; + } + READ_TIMESTAMP(intr0); + res = PyEval_CallObject(func, args); + READ_TIMESTAMP(intr1); + Py_DECREF(args); + Py_DECREF(func); + if (res == NULL) + goto error; + SETLOCAL(reg_result, res); + DISPATCH(); + } + TARGET(IMPORT_STAR) { PyObject *from = POP(), *locals; int err; @@ -2457,10 +3340,33 @@ DISPATCH(); } + TARGET(IMPORT_STAR_REG) { + int reg_from; + PyObject *from, *locals; + int err; + reg_from = NEXTREG(); + from = GETLOCAL(reg_from); + PyFrame_FastToLocals(f); + locals = f->f_locals; + if (locals == NULL) { + PyErr_SetString(PyExc_SystemError, + "no locals found during 'import *'"); + goto error; + } + READ_TIMESTAMP(intr0); + err = import_all_from(locals, from); + READ_TIMESTAMP(intr1); + PyFrame_LocalsToFast(f, 0); + if (err != 0) + goto error; + DISPATCH(); + } + TARGET(IMPORT_FROM) { - PyObject *name = GETITEM(names, oparg); - PyObject *from = TOP(); - PyObject *res; + PyObject *name, *from, *res; + oparg = NEXTARG(); + name = GETITEM(names, oparg); + from = TOP(); READ_TIMESTAMP(intr0); res = import_from(from, name); READ_TIMESTAMP(intr1); @@ -2470,15 +3376,35 @@ DISPATCH(); } + TARGET(IMPORT_FROM_REG) { + int reg_result, reg_from; + PyObject *name, *from, *res; + reg_result = NEXTREG(); + reg_from = NEXTREG(); + oparg = NEXTARG(); + from = GETLOCAL(reg_from); + name = GETITEM(names, oparg); + READ_TIMESTAMP(intr0); + res = import_from(from, name); + READ_TIMESTAMP(intr1); + if (res == NULL) + goto error; + SETLOCAL(reg_result, res); + DISPATCH(); + } + TARGET(JUMP_FORWARD) { + oparg = NEXTARG(); JUMPBY(oparg); FAST_DISPATCH(); } PREDICTED_WITH_ARG(POP_JUMP_IF_FALSE); TARGET(POP_JUMP_IF_FALSE) { - PyObject *cond = POP(); + PyObject *cond; int err; + oparg = NEXTARG(); + cond = POP(); if (cond == Py_True) { Py_DECREF(cond); FAST_DISPATCH(); @@ -2499,10 +3425,36 @@ DISPATCH(); } + PREDICTED(JUMP_IF_FALSE_REG); + TARGET(JUMP_IF_FALSE_REG) { + PyObject *cond; + int err; + int reg = NEXTREG(); + oparg = NEXTARG(); + cond = GETLOCAL(reg); + if (cond == Py_True) { + FAST_DISPATCH(); + } + if (cond == Py_False) { + JUMPTO(oparg); + FAST_DISPATCH(); + } + err = PyObject_IsTrue(cond); + if (err > 0) + err = 0; + else if (err == 0) + JUMPTO(oparg); + else + goto error; + DISPATCH(); + } + PREDICTED_WITH_ARG(POP_JUMP_IF_TRUE); TARGET(POP_JUMP_IF_TRUE) { - PyObject *cond = POP(); + PyObject *cond; int err; + oparg = NEXTARG(); + cond = POP(); if (cond == Py_False) { Py_DECREF(cond); FAST_DISPATCH(); @@ -2525,9 +3477,36 @@ DISPATCH(); } + PREDICTED(JUMP_IF_TRUE_REG); + TARGET(JUMP_IF_TRUE_REG) { + int err; + int reg = NEXTREG(); + PyObject *cond = GETLOCAL(reg); + oparg = NEXTARG(); + if (cond == Py_False) { + FAST_DISPATCH(); + } + if (cond == Py_True) { + JUMPTO(oparg); + FAST_DISPATCH(); + } + err = PyObject_IsTrue(cond); + if (err > 0) { + err = 0; + JUMPTO(oparg); + } + else if (err == 0) + ; + else + goto error; + DISPATCH(); + } + TARGET(JUMP_IF_FALSE_OR_POP) { - PyObject *cond = TOP(); + PyObject *cond; int err; + oparg = NEXTARG(); + cond = TOP(); if (cond == Py_True) { STACKADJ(-1); Py_DECREF(cond); @@ -2551,8 +3530,10 @@ } TARGET(JUMP_IF_TRUE_OR_POP) { - PyObject *cond = TOP(); + PyObject *cond; int err; + oparg = NEXTARG(); + cond = TOP(); if (cond == Py_False) { STACKADJ(-1); Py_DECREF(cond); @@ -2578,6 +3559,7 @@ PREDICTED_WITH_ARG(JUMP_ABSOLUTE); TARGET(JUMP_ABSOLUTE) { + oparg = NEXTARG(); JUMPTO(oparg); #if FAST_LOOPS /* Enabling this path speeds-up all while and for-loops by bypassing @@ -2605,11 +3587,24 @@ DISPATCH(); } + TARGET(GET_ITER_REG) { + int reg1 = NEXTREG(); + int reg2 = NEXTREG(); + PyObject *iterable = GETLOCAL(reg2); + PyObject *iter = PyObject_GetIter(iterable); + if (iter == NULL) + goto error; + SETLOCAL(reg1, iter); + DISPATCH(); + } + PREDICTED_WITH_ARG(FOR_ITER); TARGET(FOR_ITER) { /* before: [iter]; after: [iter, iter()] *or* [] */ - PyObject *iter = TOP(); - PyObject *next = (*iter->ob_type->tp_iternext)(iter); + PyObject *iter, *next; + oparg = NEXTARG(); + iter = TOP(); + next = (*iter->ob_type->tp_iternext)(iter); if (next != NULL) { PUSH(next); PREDICT(STORE_FAST); @@ -2628,12 +3623,38 @@ DISPATCH(); } + TARGET(FOR_ITER_REG) { + /* before: [iter]; after: [iter, iter()] *or* [] */ + PyObject *iter, *next; + int reg0 = NEXTREG(); + int reg1 = NEXTREG(); + oparg = NEXTARG(); + + iter = GETLOCAL(reg1); + next = (*iter->ob_type->tp_iternext)(iter); + if (next != NULL) { + PREDICT(UNPACK_SEQUENCE_REG); + /* FIXME: predict something else? */ + SETLOCAL(reg0, next); + DISPATCH(); + } + if (PyErr_Occurred()) { + if (!PyErr_ExceptionMatches(PyExc_StopIteration)) + goto error; + PyErr_Clear(); + } + /* iterator ended normally */ + JUMPBY(oparg); + DISPATCH(); + } + TARGET(BREAK_LOOP) { why = WHY_BREAK; goto fast_block_end; } TARGET(CONTINUE_LOOP) { + oparg = NEXTARG(); retval = PyLong_FromLong(oparg); if (retval == NULL) goto error; @@ -2649,6 +3670,7 @@ are not try/except/finally handlers, you may need to update the PyGen_NeedsFinalizing() function. */ + oparg = NEXTARG(); PyFrame_BlockSetup(f, opcode, INSTR_OFFSET() + oparg, STACK_LEVEL()); @@ -2658,9 +3680,11 @@ TARGET(SETUP_WITH) { _Py_IDENTIFIER(__exit__); _Py_IDENTIFIER(__enter__); - PyObject *mgr = TOP(); - PyObject *exit = special_lookup(mgr, &PyId___exit__), *enter; - PyObject *res; + PyObject *mgr, *exit, *enter, *res; + + oparg = NEXTARG(); + mgr = TOP(); + exit = special_lookup(mgr, &PyId___exit__); if (exit == NULL) goto error; SET_TOP(exit); @@ -2778,6 +3802,7 @@ TARGET(CALL_FUNCTION) { PyObject **sp, *res; + oparg = NEXTARG(); PCALL(PCALL_ALL); sp = stack_pointer; #ifdef WITH_TSC @@ -2792,15 +3817,36 @@ DISPATCH(); } + TARGET_WITH_IMPL(CALL_PROCEDURE_REG, _call_function_reg) + TARGET(CALL_FUNCTION_REG) + _call_function_reg: { + int res; + unsigned char *next_instr_var; + PCALL(PCALL_ALL); + next_instr_var = next_instr; +#ifdef WITH_TSC + res = call_function_reg(opcode == CALL_PROCEDURE_REG, fastlocals, &next_instr_var, &intr0, &intr1); +#else + res = call_function_reg(opcode == CALL_PROCEDURE_REG, fastlocals, &next_instr_var); +#endif + next_instr = next_instr_var; + if (res == -1) + goto error; + DISPATCH(); + } + TARGET_WITH_IMPL(CALL_FUNCTION_VAR, _call_function_var_kw) TARGET_WITH_IMPL(CALL_FUNCTION_KW, _call_function_var_kw) TARGET(CALL_FUNCTION_VAR_KW) _call_function_var_kw: { - int na = oparg & 0xff; - int nk = (oparg>>8) & 0xff; - int flags = (opcode - CALL_FUNCTION) & 3; - int n = na + 2 * nk; + int na, nk, flags, n; PyObject **pfunc, *func, **sp, *res; + + oparg = NEXTARG(); + na = oparg & 0xff; + nk = (oparg>>8) & 0xff; + flags = (opcode - CALL_FUNCTION) & 3; + n = na + 2 * nk; PCALL(PCALL_ALL); if (flags & CALL_FLAG_VAR) n++; @@ -2841,13 +3887,16 @@ TARGET_WITH_IMPL(MAKE_CLOSURE, _make_function) TARGET(MAKE_FUNCTION) _make_function: { - int posdefaults = oparg & 0xff; - int kwdefaults = (oparg>>8) & 0xff; - int num_annotations = (oparg >> 16) & 0x7fff; - - PyObject *qualname = POP(); /* qualname */ - PyObject *code = POP(); /* code object */ - PyObject *func = PyFunction_NewWithQualName(code, f->f_globals, qualname); + int posdefaults, kwdefaults, num_annotations; + PyObject *qualname, *code, *func; + + oparg = NEXTARG(); + posdefaults = oparg & 0xff; + kwdefaults = (oparg>>8) & 0xff; + num_annotations = (oparg >> 16) & 0x7fff; + qualname = POP(); /* qualname */ + code = POP(); /* code object */ + func = PyFunction_NewWithQualName(code, f->f_globals, qualname); Py_DECREF(code); Py_DECREF(qualname); @@ -2950,8 +3999,138 @@ DISPATCH(); } + TARGET_WITH_IMPL(MAKE_CLOSURE_REG, _make_function_reg) + TARGET(MAKE_FUNCTION_REG) + _make_function_reg: { + int reg_result, reg_qualname, reg_code, reg; + int posdefaults, kwdefaults, num_annotations; + PyObject *qualname, *code, *func; + + reg_result = NEXTREG(); + reg_qualname = NEXTREG(); + reg_code = NEXTREG(); + + qualname = GETLOCAL(reg_qualname); /* qualname */ + code = GETLOCAL(reg_code); /* code object */ + func = PyFunction_NewWithQualName(code, f->f_globals, qualname); + + if (func == NULL) + goto error; + + if (opcode == MAKE_CLOSURE_REG) { + int reg_closure = NEXTREG(); + PyObject *closure = GETLOCAL(reg_closure); + if (PyFunction_SetClosure(func, closure) != 0) { + /* Can't happen unless bytecode is corrupt. */ + Py_DECREF(func); + goto error; + } + } + + posdefaults = NEXTIMM8(); + + /* XXX Maybe this should be a separate opcode? */ + if (posdefaults > 0) { + PyObject *defs = PyTuple_New(posdefaults); + if (defs == NULL) { + Py_DECREF(func); + goto error; + } + while (--posdefaults >= 0) { + int reg_def = NEXTREG(); + PyObject *def = GETLOCAL(reg_def); + Py_INCREF(def); + PyTuple_SET_ITEM(defs, posdefaults, def); + } + if (PyFunction_SetDefaults(func, defs) != 0) { + /* Can't happen unless + PyFunction_SetDefaults changes. */ + Py_DECREF(defs); + Py_DECREF(func); + goto error; + } + Py_DECREF(defs); + } + + kwdefaults = NEXTIMM8(); + if (kwdefaults > 0) { + PyObject *defs = PyDict_New(); + if (defs == NULL) { + Py_DECREF(func); + goto error; + } + while (--kwdefaults >= 0) { + int reg_v, reg_key; + PyObject *v, *key; + reg_v = NEXTREG(); + reg_key = NEXTREG(); + v = GETLOCAL(reg_v); /* default value */ + key = GETLOCAL(reg_key); /* kw only arg name */ + int err = PyDict_SetItem(defs, key, v); + if (err != 0) { + Py_DECREF(defs); + Py_DECREF(func); + goto error; + } + } + if (PyFunction_SetKwDefaults(func, defs) != 0) { + /* Can't happen unless + PyFunction_SetKwDefaults changes. */ + Py_DECREF(func); + Py_DECREF(defs); + goto error; + } + Py_DECREF(defs); + } + + num_annotations = NEXTIMM8(); + + if (num_annotations > 0) { + Py_ssize_t name_ix; + PyObject *names; /* names of args with annotations */ + PyObject *anns; + reg = NEXTREG(); + names = GETLOCAL(reg); + anns = PyDict_New(); + if (anns == NULL) { + Py_DECREF(func); + goto error; + } + name_ix = PyTuple_Size(names); + assert(num_annotations == name_ix+1); + while (name_ix > 0) { + PyObject *name, *value; + int err; + --name_ix; + name = PyTuple_GET_ITEM(names, name_ix); + + reg = NEXTREG(); + value = GETLOCAL(reg); + err = PyDict_SetItem(anns, name, value); + if (err != 0) { + Py_DECREF(anns); + Py_DECREF(func); + goto error; + } + } + + if (PyFunction_SetAnnotations(func, anns) != 0) { + /* Can't happen unless + PyFunction_SetAnnotations changes. */ + Py_DECREF(anns); + Py_DECREF(func); + goto error; + } + Py_DECREF(anns); + } + + SETLOCAL(reg_result, func); + DISPATCH(); + } + TARGET(BUILD_SLICE) { PyObject *start, *stop, *step, *slice; + oparg = NEXTARG(); if (oparg == 3) step = POP(); else @@ -2968,9 +4147,41 @@ DISPATCH(); } + TARGET(BUILD_SLICE2_REG) { + int reg0 = NEXTREG(); + int reg1 = NEXTREG(); + int reg2 = NEXTREG(); + PyObject *start, *stop, *slice; + start = GETLOCAL(reg1); + stop = GETLOCAL(reg2); + slice = PySlice_New(start, stop, NULL); + if (slice == NULL) + goto error; + SETLOCAL(reg0, slice); + DISPATCH(); + } + + TARGET(BUILD_SLICE3_REG) { + int reg0 = NEXTREG(); + int reg1 = NEXTREG(); + int reg2 = NEXTREG(); + int reg3 = NEXTREG(); + PyObject *start, *stop, *step, *slice; + start = GETLOCAL(reg1); + stop = GETLOCAL(reg2); + step = GETLOCAL(reg3); + slice = PySlice_New(start, stop, step); + if (slice == NULL) + goto error; + SETLOCAL(reg0, slice); + DISPATCH(); + } + TARGET(EXTENDED_ARG) { + oparg = NEXTARG(); + extendedarg = oparg<<16; + opcode = NEXTOP(); - oparg = oparg<<16 | NEXTARG(); goto dispatch_opcode; } @@ -3773,6 +4984,97 @@ return 0; } +static int +unpack_iterable_reg(PyObject *v, int argcnt, + unsigned char **p_next_instr, PyObject **fastlocals) +{ + int i = 0, j = 0; + Py_ssize_t ll = 0; + PyObject *it; /* iter(v) */ + PyObject *w; + PyObject *l = NULL; /* variable list */ + int reg; + unsigned char **old_next_instr = p_next_instr; + int ext; + int argcntafter; + + assert(v != NULL); + + it = PyObject_GetIter(v); + if (it == NULL) + goto Error; + + ext = (argcnt == -1); + if (ext) + argcnt = NEXTIMM8_PTR(p_next_instr); + + for (; i < argcnt; i++) { + w = PyIter_Next(it); + if (w == NULL) { + /* Iterator done, via error or exhaustion. */ + if (!PyErr_Occurred()) { + PyErr_Format(PyExc_ValueError, + "need more than %d value%s to unpack", + i, i == 1 ? "" : "s"); + } + goto Error; + } + reg = NEXTREG_PTR(p_next_instr); + SETLOCAL(reg, w); + } + + if (!ext) { + /* We better have exhausted the iterator now. */ + w = PyIter_Next(it); + if (w == NULL) { + if (PyErr_Occurred()) + goto Error; + Py_DECREF(it); + return 1; + } + Py_DECREF(w); + PyErr_Format(PyExc_ValueError, "too many values to unpack " + "(expected %d)", argcnt); + goto Error; + } + + l = PySequence_List(it); + if (l == NULL) + goto Error; + reg = NEXTREG_PTR(p_next_instr); + SETLOCAL(reg, l); + i++; + + argcntafter = NEXTIMM8_PTR(p_next_instr); + + ll = PyList_GET_SIZE(l); + if (ll < argcntafter) { + PyErr_Format(PyExc_ValueError, "need more than %zd values to unpack", + argcnt + ll); + goto Error; + } + + /* Pop the "after-variable" args off the list. */ + for (j = argcntafter; j > 0; j--, i++) { + reg = NEXTREG_PTR(p_next_instr); + w = PyList_GET_ITEM(l, ll - j); + SETLOCAL(reg, w); + } + /* Resize the list. */ + Py_SIZE(l) = ll - argcntafter; + Py_DECREF(it); + return 1; + +Error: + for (; i > 0; i--) { + reg = NEXTREG_PTR(old_next_instr); + w = GETLOCAL(reg); + Py_DECREF(w); + } + Py_XDECREF(it); + return 0; +} + #ifdef LLTRACE static int @@ -4183,6 +5485,101 @@ return x; } +static int +call_function_reg(int is_procedure, PyObject **fastlocals, unsigned char **p_next_instr +#ifdef WITH_TSC + , uint64* pintr0, uint64* pintr1 +#endif + ) +{ + int regres, regfunc, nreg; + int na, nk, n; + PyObject *func, *x, *method_self = NULL; + + if (!is_procedure) + regres = NEXTREG_PTR(p_next_instr); + regfunc = NEXTREG_PTR(p_next_instr); + /* FIXME: nreg: + extendedarg */ + nreg = NEXTARG_PTR(p_next_instr); + func = GETLOCAL(regfunc); + + na = nreg & 0xff; + nk = (nreg >>8) & 0xff; + n = na + 2 * nk; + + /* Always dispatch PyCFunction first, because these are + presumed to be the most frequent callable object. + */ + if (PyCFunction_Check(func) && nk == 0) { + int flags = PyCFunction_GET_FLAGS(func); + PyThreadState *tstate = PyThreadState_GET(); + + PCALL(PCALL_CFUNCTION); + if (flags & (METH_NOARGS | METH_O)) { + PyCFunction meth = PyCFunction_GET_FUNCTION(func); + PyObject *self = PyCFunction_GET_SELF(func); + if (flags & METH_NOARGS && na == 0) { + C_TRACE(x, (*meth)(self,NULL)); + } + else if (flags & METH_O && na == 1) { + int regarg0 = NEXTREG_PTR(p_next_instr); + PyObject *arg = GETLOCAL(regarg0); + C_TRACE(x, (*meth)(self,arg)); + } + else { + err_args(func, flags, na); + x = NULL; + } + } + else { + PyObject *callargs; + callargs = load_args_reg(na, fastlocals, p_next_instr); + READ_TIMESTAMP(*pintr0); + C_TRACE(x, PyCFunction_Call(func,callargs,NULL)); + READ_TIMESTAMP(*pintr1); + Py_XDECREF(callargs); + } + } else { + if (PyMethod_Check(func) && PyMethod_GET_SELF(func) != NULL) { + /* optimize access to bound methods */ + PyObject *self = PyMethod_GET_SELF(func); + method_self = self; + Py_INCREF(method_self); + PCALL(PCALL_METHOD); + PCALL(PCALL_BOUND_METHOD); + func = PyMethod_GET_FUNCTION(func); + Py_INCREF(func); + na++; + n++; + } else + Py_INCREF(func); + READ_TIMESTAMP(*pintr0); + if (PyFunction_Check(func)) + x = fast_function_reg(method_self, func, fastlocals, p_next_instr, n, na, nk); + else + x = do_call_reg(func, fastlocals, p_next_instr, na, nk); + if (method_self != NULL) + Py_DECREF(method_self); + READ_TIMESTAMP(*pintr1); + Py_DECREF(func); + } + + /* Clear the stack of the function object. Also removes + the arguments in case they weren't consumed already + (fast_function() and err_args() leave them on the stack). + */ + if (x != NULL) { + if (!is_procedure) + SETLOCAL(regres, x); + else + Py_DECREF(x); + } + + if (x == NULL) + return -1; + return 0; +} + /* The fast_function() function optimize calls for which no argument tuple is necessary; the objects are passed directly from the stack. For the simplest case -- a function that takes only positional @@ -4248,6 +5645,103 @@ } static PyObject * +fast_function_reg(PyObject *method_self, PyObject *func, PyObject **fastlocals, unsigned char **p_next_instr, int n, int na, int nk) +{ + PyCodeObject *co = (PyCodeObject *)PyFunction_GET_CODE(func); + PyObject *globals = PyFunction_GET_GLOBALS(func); + PyObject *argdefs = PyFunction_GET_DEFAULTS(func); + PyObject *kwdefs = PyFunction_GET_KW_DEFAULTS(func); + PyObject **d = NULL; + PyObject **args; + PyObject **kws; + PyObject *res; + int i; + int nd = 0; + int reg; + PyObject *value; + + PCALL(PCALL_FUNCTION); + PCALL(PCALL_FAST_FUNCTION); + if (argdefs == NULL && co->co_argcount == n && + co->co_kwonlyargcount == 0 && nk==0 && + co->co_flags == (CO_OPTIMIZED | CO_NEWLOCALS | CO_NOFREE)) { + PyFrameObject *f; + PyObject *retval = NULL; + PyThreadState *tstate = PyThreadState_GET(); + PyObject **f_fastlocals; + + PCALL(PCALL_FASTER_FUNCTION); + assert(globals != NULL); + /* XXX Perhaps we should create a specialized + PyFrame_New() that doesn't take locals, but does + take builtins without sanity checking them. + */ + assert(tstate != NULL); + f = PyFrame_New(tstate, co, globals, NULL); + if (f == NULL) + return NULL; + + f_fastlocals = f->f_localsplus; + + for (i = 0; i < n; i++) { + if (i == 0 && method_self != NULL) { + value = method_self; + } + else { + reg = NEXTREG_PTR(p_next_instr); + value = GETLOCAL(reg); + } + Py_INCREF(value); + f_fastlocals[i] = value; + } + retval = PyEval_EvalFrameEx(f,0); + ++tstate->recursion_depth; + Py_DECREF(f); + --tstate->recursion_depth; + return retval; + } + + /* FIXME: don't use alloca() if na > 10 */ + args = alloca(sizeof(PyObject*) * na); + for (i=0; i < na; i++) { + if (i == 0 && method_self != NULL) { + value = method_self; + } + else { + reg = NEXTREG_PTR(p_next_instr); + value = GETLOCAL(reg); + } + /* Py_INCREF(value); */ + args[i] = value; + } + /* FIXME: don't use alloca() if nk > 5 */ + kws = alloca(sizeof(PyObject*) * nk * 2); + for (i=0; i < nk; i++) { + reg = NEXTREG_PTR(p_next_instr); + value = GETLOCAL(reg); + /* Py_INCREF(key); */ + kws[2*i] = value; + + reg = NEXTREG_PTR(p_next_instr); + value = GETLOCAL(reg); + /* Py_INCREF(value); */ + kws[2*i+1] = value; + } + if (argdefs != NULL) { + d = &PyTuple_GET_ITEM(argdefs, 0); + nd = Py_SIZE(argdefs); + } + + res = PyEval_EvalCodeEx((PyObject*)co, globals, + (PyObject *)NULL, + args, na, + kws, nk, + d, nd, kwdefs, + PyFunction_GET_CLOSURE(func)); + return res; +} + +static PyObject * update_keyword_args(PyObject *orig_kwdict, int nk, PyObject ***pp_stack, PyObject *func) { @@ -4288,6 +5782,47 @@ } static PyObject * +update_keyword_args_reg(PyObject *orig_kwdict, int nk, + PyObject **fastlocals, unsigned char **p_next_instr, + PyObject *func) +{ + PyObject *kwdict = NULL; + if (orig_kwdict == NULL) + kwdict = PyDict_New(); + else { + kwdict = PyDict_Copy(orig_kwdict); + Py_DECREF(orig_kwdict); + } + if (kwdict == NULL) + return NULL; + while (--nk >= 0) { + int err; + PyObject *value, *key; + int reg; + reg = NEXTREG_PTR(p_next_instr); + key = GETLOCAL(reg); + reg = NEXTREG_PTR(p_next_instr); + value = GETLOCAL(reg); + if (PyDict_GetItem(kwdict, key) != NULL) { + PyErr_Format(PyExc_TypeError, + "%.200s%s got multiple values " + "for keyword argument '%U'", + PyEval_GetFuncName(func), + PyEval_GetFuncDesc(func), + key); + Py_DECREF(kwdict); + return NULL; + } + err = PyDict_SetItem(kwdict, key, value); + if (err) { + Py_DECREF(kwdict); + return NULL; + } + } + return kwdict; +} + +static PyObject * update_star_args(int nstack, int nstar, PyObject *stararg, PyObject ***pp_stack) { @@ -4328,6 +5863,25 @@ } static PyObject * +load_args_reg(int na, PyObject **fastlocals, unsigned char **p_next_instr) +{ + PyObject *args = PyTuple_New(na); + PyObject *w; + int reg; + int i; + + if (args == NULL) + return NULL; + for (i=0; i < na; i++) { + reg = NEXTREG_PTR(p_next_instr); + w = GETLOCAL(reg); + Py_INCREF(w); + PyTuple_SET_ITEM(args, i, w); + } + return args; +} + +static PyObject * do_call(PyObject *func, PyObject ***pp_stack, int na, int nk) { PyObject *callargs = NULL; @@ -4371,6 +5925,49 @@ } static PyObject * +do_call_reg(PyObject *func, PyObject **fastlocals, unsigned char **p_next_instr, int na, int nk) +{ + PyObject *callargs = NULL; + PyObject *kwdict = NULL; + PyObject *result = NULL; + + callargs = load_args_reg(na, fastlocals, p_next_instr); + if (callargs == NULL) + goto call_fail; + if (nk > 0) { + kwdict = update_keyword_args_reg(NULL, nk, fastlocals, p_next_instr, func); + if (kwdict == NULL) + goto call_fail; + } +#ifdef CALL_PROFILE + /* At this point, we have to look at the type of func to + update the call stats properly. Do it here so as to avoid + exposing the call stats machinery outside ceval.c + */ + if (PyFunction_Check(func)) + PCALL(PCALL_FUNCTION); + else if (PyMethod_Check(func)) + PCALL(PCALL_METHOD); + else if (PyType_Check(func)) + PCALL(PCALL_TYPE); + else if (PyCFunction_Check(func)) + PCALL(PCALL_CFUNCTION); + else + PCALL(PCALL_OTHER); +#endif + if (PyCFunction_Check(func)) { + PyThreadState *tstate = PyThreadState_GET(); + C_TRACE(result, PyCFunction_Call(func, callargs, kwdict)); + } + else + result = PyObject_Call(func, callargs, kwdict); +call_fail: + Py_XDECREF(callargs); + Py_XDECREF(kwdict); + return result; +} + +static PyObject * ext_do_call(PyObject *func, PyObject ***pp_stack, int flags, int na, int nk) { int nstar = 0; diff -r fb80df16c4ff -r 6a9ca0177020 Python/compile.c --- a/Python/compile.c Tue Oct 23 21:14:34 2012 +0300 +++ b/Python/compile.c Tue May 20 11:04:02 2014 +0200 @@ -31,6 +31,7 @@ #include "opcode.h" int Py_OptimizeFlag = 0; +PyObject* Py_CodeOptimizer = NULL; #define DEFAULT_BLOCK_SIZE 16 #define DEFAULT_BLOCKS 8 @@ -4141,6 +4142,11 @@ c->c_filename_obj, c->u->u_name, c->u->u_firstlineno, a->a_lnotab); + if (co != NULL && Py_CodeOptimizer) { + PyObject *newcode = PyObject_CallFunction(Py_CodeOptimizer, "O", co); + Py_DECREF(co); + co = newcode; + } error: Py_XDECREF(consts); Py_XDECREF(names); diff -r fb80df16c4ff -r 6a9ca0177020 Python/opcode_targets.h --- a/Python/opcode_targets.h Tue Oct 23 21:14:34 2012 +0300 +++ b/Python/opcode_targets.h Tue May 20 11:04:02 2014 +0200 @@ -147,65 +147,65 @@ &&TARGET_LIST_APPEND, &&TARGET_SET_ADD, &&TARGET_MAP_ADD, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, + &&TARGET_UNARY_NOT_REG, &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, + &&TARGET_LOAD_CONST_REG, + &&TARGET_RETURN_VALUE_REG, + &&TARGET_BINARY_ADD_REG, + &&TARGET_POP_REG, + &&TARGET_MOVE_REG, + &&TARGET_PUSH_REG, + &&TARGET_BINARY_MULTIPLY_REG, + &&TARGET_LOAD_GLOBAL_REG, + &&TARGET_BINARY_SUBTRACT_REG, + &&TARGET_FOR_ITER_REG, + &&TARGET_COMPARE_REG, + &&TARGET_JUMP_IF_FALSE_REG, + &&TARGET_CALL_FUNCTION_REG, + &&TARGET_GET_ITER_REG, + &&TARGET_BINARY_SUBSCR_REG, + &&TARGET_LOAD_ATTR_REG, + &&TARGET_BINARY_FLOOR_DIVIDE_REG, + &&TARGET_JUMP_IF_TRUE_REG, + &&TARGET_UNARY_NEGATIVE_REG, + &&TARGET_BUILD_SLICE2_REG, + &&TARGET_BUILD_LIST_REG, + &&TARGET_UNPACK_SEQUENCE_REG, + &&TARGET_BUILD_TUPLE_REG, + &&TARGET_INPLACE_ADD_REG, + &&TARGET_INPLACE_MULTIPLY_REG, + &&TARGET_INPLACE_SUBTRACT_REG, + &&TARGET_INPLACE_FLOOR_DIVIDE_REG, + &&TARGET_STORE_GLOBAL_REG, + &&TARGET_STORE_ATTR_REG, + &&TARGET_BUILD_MAP_REG, + &&TARGET_STORE_MAP_REG, + &&TARGET_MAKE_FUNCTION_REG, + &&TARGET_LOAD_BUILD_CLASS_REG, + &&TARGET_CALL_PROCEDURE_REG, + &&TARGET_CLEAR_REG, + &&TARGET_MAKE_CLOSURE_REG, + &&TARGET_LOAD_CLOSURE_REG, + &&TARGET_STORE_DEREF_REG, + &&TARGET_DELETE_SUBSCR_REG, + &&TARGET_LOAD_DEREF_REG, + &&TARGET_BINARY_MODULO_REG, + &&TARGET_STORE_SUBSCR_REG, + &&TARGET_BINARY_TRUE_DIVIDE_REG, + &&TARGET_INPLACE_TRUE_DIVIDE_REG, + &&TARGET_INPLACE_MODULO_REG, + &&TARGET_LOAD_NAME_REG, + &&TARGET_LIST_APPEND_REG, + &&TARGET_STORE_NAME_REG, + &&TARGET_IMPORT_NAME_REG, + &&TARGET_IMPORT_FROM_REG, + &&TARGET_IMPORT_STAR_REG, + &&TARGET_YIELD_REG, + &&TARGET_STORE_LOCALS_REG, + &&TARGET_UNARY_POSITIVE_REG, + &&TARGET_MAP_ADD_REG, + &&TARGET_BUILD_SLICE3_REG, + &&TARGET_UNPACK_EX_REG, &&_unknown_opcode, &&_unknown_opcode, &&_unknown_opcode, diff -r fb80df16c4ff -r 6a9ca0177020 Python/sysmodule.c --- a/Python/sysmodule.c Tue Oct 23 21:14:34 2012 +0300 +++ b/Python/sysmodule.c Tue May 20 11:04:02 2014 +0200 @@ -411,6 +411,29 @@ return 0; } +extern PyObject* Py_CodeOptimizer; + +static PyObject * +sys_setcodeoptimizer(PyObject *self, PyObject *func) +{ + if (func == Py_None) { + Py_CLEAR(Py_CodeOptimizer); + } + else { + Py_XDECREF(Py_CodeOptimizer); + Py_INCREF(func); + Py_CodeOptimizer = func; + } + Py_INCREF(Py_None); + return Py_None; +} + +PyDoc_STRVAR(setcodeoptimizer_doc, +"setcodeoptimizer(function)\n\ +\n\ +Optimize a newly created code object. The function returns a new code object." +); + static PyObject * sys_settrace(PyObject *self, PyObject *args) { @@ -1113,6 +1136,7 @@ #endif {"settrace", sys_settrace, METH_O, settrace_doc}, {"gettrace", sys_gettrace, METH_NOARGS, gettrace_doc}, + {"setcodeoptimizer", sys_setcodeoptimizer, METH_O, setcodeoptimizer_doc}, {"call_tracing", sys_call_tracing, METH_VARARGS, call_tracing_doc}, {"_debugmallocstats", sys_debugmallocstats, METH_VARARGS, debugmallocstats_doc}, diff -r fb80df16c4ff -r 6a9ca0177020 REGISTERVM.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/REGISTERVM.txt Tue May 20 11:04:02 2014 +0200 @@ -0,0 +1,319 @@ ++++++++++++++++++++++++++++++++++++++++++ +Register-based Virtual Machine for Python ++++++++++++++++++++++++++++++++++++++++++ + +Status +====== + + * Most instructions using the stack are converted to instructions using + registers + * Bytecode using registers with all optimizations enable is usually 10% faster + than bytecode using the stack, according to pybench + * registervm generates invalid code, see TODO section below, so it's not + possible yet to use it on the Python test suite + + +TODO +==== + +Bugs +---- + + * Register allocator doesn't handle correctly conditional branches: CLEAR_REG + is removed on the wrong branch in test_move_instr. + * Fail to track the stack state in if/else. Bug hidden by the register + allocator in the following example: + + def func(obj): + obj.attr = sys.modules['warnings'] if module is None else module + + * Don't move globals out of if. Only out of loops? subprocess.py: + + if mswindows: + if p2cwrite != -1: + p2cwrite = msvcrt.open_osfhandle(p2cwrite.Detach(), 0) + + But do move len() out of loop for: + + def loop_move_instr(): + length = 0 + for i in range(5): + length += len("abc") - 1 + return length + + * Don't remove duplicate LOAD_GLOBAL in + "LOAD_GLOBAL ...; CALL_PROCEDURE ...; LOAD_GLOBAL ...": + CALL_PROCEDURE has border effect + + * Don't remove duplicate LOAD_NAME if a function has a border effect:: + + x=1 + def modify(): + global x + x = 2 + print(x) + modify() + print(x) + +Improvments +----------- + + * Move LOAD_CONST out of loops: it was done in a previous version, but the + optimization was broken by the introduction of CLEAR_REG + * Copy constants to the frame objects so constants can be used as registers + and LOAD_CONST instructions can be simplify removed + * Enable move_load_const by default? + * Fix moving LOAD_ATTR_REG: only do that when calling methods. + See test_sieve() of test_registervm: primes.append(). + + result = Result() + while 1: + if result.done: + break + func(result) + + * Reenable merging duplicate LOAD_ATTR + * Register allocation for locale_alias = {...} is very very slow + * "while 1: ... return" generates useless SETUP_LOOP + * Reuse locals? + * implement register version of the following instructions: + + - DELETE_ATTR + - try/finally + - yield from + - CALL_FUNCTION_VAR_KW + - CALL_FUNCTION_VAR + - operators: a | b, a & b, a ^ b, a |= b, a &= b, a ^= b + + * DEREF: + + - add a test using free variables + - Move LOAD_DEREF_REG out of loops + + * NAME: + + - test_list_append() of test_registervm.py + - Move LOAD_NAME_REG out of loop + + * Handle JUMP_IF_TRUE_OR_POP: see test_getline() of test_registervm + * Compute the number of used registers in a frame + * Write a new test per configuration option + * Factorize code processing arg_types, ex: disassmblers of dis and registervm modules + * Add tests on class methods + * Fix lnotab + + +Changelog +========= + +2012-12-21 + + * Use RegisterTracker to merge duplicated LOAD, STORE_GLOBAL/LOAD_GLOBAL + are now also simplified + +2012-12-19 + + * Emit POP_REG to simplify the stack tracker + +2012-12-18 + + * LOAD are now only moved out of loops + +2012-12-14 + + * Duplicated LOAD instructions can be merged without moving them + * Rewrite the stack tracker: PUSH_REG don't need to be moved anymore + * Fix JUMP_IF_TRUE_OR_POP/JUMP_IF_FALSE_OR_POP to not generate invalid code + * Don't move LOAD_ATTR_REG out of try/except block + +2012-12-11 + + * Split instructions into linked-blocks + +2012-11-26 + + * Add a stack tracker + +2012-11-20 + + * Remove useless jumps + * CALL_FUNCTION_REG and CALL_PROCEDURE_REG are fully implemented + +2012-10-29 + + * Remove "if (HAS_ARG(op))" check in PyEval_EvalFrameEx() + +2012-10-27 + + * Duplicated LOAD_CONST and LOAD_GLOBAL are merged (optimization disabled on + LOAD_GLOBAL because it is buggy) + +2012-10-23 + + * initial commit, 0f7f49b7083c + + + +CPython 3.3 bytecode is inefficient +=================================== + + * Useless jump: JUMP_ABSOLUTE + * Generate dead code: RETURN_VALUE; RETURN_VALUE (the second instruction is unreachable) + * Duplicate constants: see TupleSlicing of pybench + * Constant folding: see astoptimizer project + * STORE_NAME 'f'; LOAD_NAME 'f' + * STORE_GLOBAL 'x'; LOAD_GLOBAL 'x' + + +Rationale +========= + +The performance of the loop evaluating bytecode is critical in Python. For +Python example, using computed-goto instead of switch to dispatch bytecode +improved performances by 20%. Related issues: + + * `use computed goto's in ceval loop `_ + * `Faster opcode dispatch on gcc `_ + * `Computed-goto patch for RE engine `_ + +Using registers of a stack reduce the number of operations, but increase the +size of the code. I expect an significant speedup when all operations will use +registers. + + +Optimizations +============= + +Optimizations: + + * Remove useless LOAD_NAME and LOAD_GLOBAL. + For example: "STORE_NAME var; LOAD_NAME var" + * Merge duplicate loads (LOAD_CONST, LOAD_GLOBAL_REG, LOAD_ATTR). + For example, "lst.append(1); lst.append(1)" only gets constant "1" and the + "lst.append" attribute once. + +Misc: + + * Automatically detect inplace operations. For example, "x = x + y" is + compiled to "BINARY_ADD_REG 'x', 'x', 'y'" which calls + PyNumber_InPlaceAdd(), instead of PyNumber_Add(). + * Move constant, global and attribute loads out of loops (to the beginning) + * Remove useless jumps (ex: JUMP_FORWARD ) + + +Algorithm +========= + +The current implementation rewrites the stack-based operations to use +register-based operations instead. For example, "LOAD_GLOBAL range" is replaced +with "LOAD_GLOBAL_REG R0, range; PUSH_REG R0". This first step is inefficient +because it increases the number of operations. + +Then, operations are reordered: PUSH_REG and POP_REG to the end. So we can +replace "PUSH_REG R0; PUSH_REG R1; STACK_OPERATION; POP_REG R2" with a single +operatiton: "REGISTER_OPERATION R2, R0, R1". + +Move invariant out of the loop: it is possible to move constants out of the loop. +For example, LOAD_CONST_REG are moved to the beginning. We might also move +LOAD_GLOBAL_REG and LOAD_ATTR_REG to the beginning. + +Later, a new AST to bytecote compiler can be implemented to emit directly +operations using registers. + + +Example +======= + +Simple function computing the factorial of n:: + + def fact_iter(n): + f = 1 + for i in range(2, n+1): + f *= i + return f + +Stack-based bytecode (20 instructions):: + + 0 LOAD_CONST 1 (const#1) + 3 STORE_FAST 'f' + 6 SETUP_LOOP + 9 LOAD_GLOBAL 0 (range) + 12 LOAD_CONST 2 (const#2) + 15 LOAD_FAST 'n' + 18 LOAD_CONST 1 (const#1) + 21 BINARY_ADD + 22 CALL_FUNCTION 2 (2 positional, 0 keyword pair) + 25 GET_ITER + >> 26 FOR_ITER + 29 STORE_FAST 'i' + 32 LOAD_FAST 'f' + 35 LOAD_FAST 'i' + 38 INPLACE_MULTIPLY + 39 STORE_FAST 'f' + 42 JUMP_ABSOLUTE + >> 45 POP_BLOCK + >> 46 LOAD_FAST 'f' + 49 RETURN_VALUE + +Register-based bytecode (13 instructions):: + + + 0 LOAD_CONST_REG 'f', 1 (const#1) + 5 LOAD_CONST_REG R0, 2 (const#2) + 10 LOAD_GLOBAL_REG R1, 'range' (name#0) + 15 SETUP_LOOP + 18 BINARY_ADD_REG R2, 'n', 'f' + 25 CALL_FUNCTION_REG 4, R1, R1, R0, R2 + 36 GET_ITER_REG R1, R1 + >> 41 FOR_ITER_REG 'i', R1, + 48 INPLACE_MULTIPLY_REG 'f', 'i' + 53 JUMP_ABSOLUTE + >> 56 POP_BLOCK + >> 57 RETURN_VALUE_REG 'f' + +The body of the main loop of this function is composed of 1 instructions +instead of 5. + + +Comparative table +================= + +Example |S|r|R| Stack | Register +------------+-+-+-+----------------------------------+---------------------------------------------------- +append(2) |4|1|2| LOAD_FAST 'append' | LOAD_CONST_REG R1, 2 (const#2) + | | | | LOAD_CONST 2 (const#2) | ... + | | | | CALL_FUNCTION (1 positional) | ... + | | | | POP_TOP | CALL_PROCEDURE_REG 'append', (1 positional), R1 +------------+-+-+-+----------------------------------+---------------------------------------------------- +l[0] = 3 |4|1|2| LOAD_CONST 3 (const#1) | LOAD_CONST_REG R0, 3 (const#1) + | | | | LOAD_FAST 'l' | LOAD_CONST_REG R3, 0 (const#4) + | | | | LOAD_CONST 0 (const#4) | ... + | | | | STORE_SUBSCR | STORE_SUBSCR_REG 'l', R3, R0 +------------+-+-+-+----------------------------------+---------------------------------------------------- +x = l[0] |4|1|2| LOAD_FAST 'l' | LOAD_CONST_REG R3, 0 (const#4) + | | | | LOAD_CONST 0 (const#4) | ... + | | | | BINARY_SUBSCR | ... + | | | | STORE_FAST 'x' | BINARY_SUBSCR_REG 'x', 'l', R3 +------------+-+-+-+----------------------------------+---------------------------------------------------- +s.isalnum() |4|1|2| LOAD_FAST 's' | LOAD_ATTR_REG R5, 's', 'isalnum' (name#3) + | | | | LOAD_ATTR 'isalnum' (name#3) | ... + | | | | CALL_FUNCTION (0 positional) | ... + | | | | POP_TOP | CALL_PROCEDURE_REG R5, (0 positional) +------------+-+-+-+----------------------------------+---------------------------------------------------- +o.a = 2 |3|1|2| LOAD_CONST 2 (const#3) | LOAD_CONST_REG R2, 2 (const#3) + | | | | LOAD_FAST 'o' | ... + | | | | STORE_ATTR 'a' (name#2) | STORE_ATTR_REG 'o', 'a' (name#2), R2 +------------+-+-+-+----------------------------------+---------------------------------------------------- +x = o.a |3|1|1| LOAD_FAST 'o' | LOAD_ATTR_REG 'x', 'o', 'a' (name#2) + | | | | LOAD_ATTR 'a' (name#2) | + | | | | STORE_FAST 'x' | +------------+-+-+-+----------------------------------+---------------------------------------------------- + +Columns: + + * "S": Number of stack-based instructions + * "r": Number of stack-based instructions exclusing instructions moved + out of loops (ex: LOAD_CONST_REG) + * "R": Total number of stack-based instructions (including instructions moved + out of loops) + diff -r fb80df16c4ff -r 6a9ca0177020 Tools/pybench/CommandLine.py --- a/Tools/pybench/CommandLine.py Tue Oct 23 21:14:34 2012 +0300 +++ b/Tools/pybench/CommandLine.py Tue May 20 11:04:02 2014 +0200 @@ -358,6 +358,7 @@ print() print('* User Break') print() + raise rc = 1 except self.InternalError: diff -r fb80df16c4ff -r 6a9ca0177020 Tools/pybench/pybench.py --- a/Tools/pybench/pybench.py Tue Oct 23 21:14:34 2012 +0300 +++ b/Tools/pybench/pybench.py Tue May 20 11:04:02 2014 +0200 @@ -1,4 +1,11 @@ #!/usr/local/bin/python -O +from __future__ import print_function +REGISTERVM_HACK = True +if REGISTERVM_HACK: + import registervm + import types + REGISTERVM_CONFIG = registervm.Config() + REGISTERVM_CONFIG.enable_unsafe_optimizations() """ A Python Benchmark Suite @@ -10,7 +17,6 @@ # module Setup.py. # -from __future__ import print_function # pybench Copyright __copyright__ = """\ @@ -38,6 +44,9 @@ import sys import time import platform +if REGISTERVM_HACK: + import registervm + import dis from CommandLine import * try: @@ -95,6 +104,25 @@ ### Helpers +def optimize(what, obj, func): + if not REGISTERVM_HACK: + return + if isinstance(func, types.MethodType): + func = func.__func__ + print("%s %s..." % (what, obj.__class__.__name__)) + try: + converter = registervm.Converter(func.__code__, REGISTERVM_CONFIG) + converter.convert() + func.__code__ = converter.compile() + except Exception as err: + print("FAILED TO OPTIMIZE CALIBRATE %s: %s" % (obj.__class__.__name__, err)) + if str(err) != 'too much registers!': + raise + else: + converter.check_inefficient_code() + #code.dump() + #dis.dis(self.calibrate) + def get_timer(timertype): if timertype == TIMER_TIME_TIME: @@ -231,7 +259,8 @@ if warp is not None: self.rounds = int(self.rounds / warp) if self.rounds == 0: - raise ValueError('warp factor set too high') + self.rounds = 1 + #raise ValueError('warp factor set too high') self.warp = warp if calibration_runs is not None: if (not ALLOW_SKIPPING_CALIBRATION and @@ -277,6 +306,8 @@ return calibrate = self.calibrate + optimize("CALIBRATE", self, self.calibrate) + timer = self.get_timer() calibration_loops = range(CALIBRATION_LOOPS) @@ -325,18 +356,22 @@ """ test = self.test + optimize("RUN TEST", self, self.test) + timer = self.get_timer() # Get calibration min_overhead = min(self.overhead_times) + # FIXME + min_overhead = 0 # Test run t = timer() test() t = timer() - t - if t < MIN_TEST_RUNTIME: - raise ValueError('warp factor too high: ' - 'test times are < 10ms') + #if t < MIN_TEST_RUNTIME: + # raise ValueError('warp factor too high: ' + # 'test times are < 10ms') eff_time = t - min_overhead if eff_time < 0: raise ValueError('wrong calibration') @@ -945,6 +980,7 @@ print() print('*** KeyboardInterrupt -- Aborting') print() + raise return bench.print_header() if compare_to: diff -r fb80df16c4ff -r 6a9ca0177020 demo.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/demo.py Tue May 20 11:04:02 2014 +0200 @@ -0,0 +1,158 @@ +import dis +import functools +import registervm +import sys +import time +import tokenize +from optparse import OptionParser + +BENCH = False + +def exec_code(code): + # specify a namespace, or import will not work + namespace = {} + exec(code, namespace, namespace) + +def benchmark(what, code): + print("Benchmark %s: prepare" % what) + for prepare in range(3): + exec_code(code) + best = None + runs = 5 + loops = 100 + for run in range(runs): + print("Benchmark %s: run %s/%s" % (what, 1+run, runs)) + t0 = time.perf_counter() + for loop in range(loops): + exec_code(code) + dt = time.perf_counter() - t0 + if best is not None: + best = min(best, dt) + else: + best = dt + print("Best time (%s runs, %s loops) of %s: %.1f ms per loop" + % (runs, loops, what, best * 1e3 / loops)) + +def parse_options(): + parser = OptionParser(usage="%prog [options] -f script.py|COMMAND [COMMAND2 COMMAND3 ...]") + parser.add_option("-2", "--twice", + help="optimize twice", + action="store_true", default=False) + parser.add_option("-f", "--file", + type="str", + action="store", default=None) + parser.add_option("-a", "--assembler", + help="show assembler", + action="store_true", default=False) + parser.add_option("-v", "--verbose", + help="verbose mode", + action="store_true", default=False) + parser.add_option("-Z", "--dump-optimized", + help="Don't dump optimized bytecode/blocks", + action="store_false", default=True) + parser.add_option("-O", "--dump-original", + help="Dump original bytecode/blocks", + action="store_true", default=False) + parser.add_option("-C", "--dump-bytecode", + help="Dump bytecode", + action="store_true", default=False) + parser.add_option("-B", "--dump-blocks", + help="Dump blocks", + action="store_true", default=False) + parser.add_option("-q", "--quiet", + help="Quiet mode", + action="store_true", default=False) + parser.add_option("-d", "--debug", + help="Disable debug mode", + action="store_false", default=True) + parser.add_option("-b", "--benchmark", + help="Run benchmark", + action="store_true", default=False) + parser.add_option("-m", "--hide-main", + help="Hide code of the __main__ module", + action="store_true", default=False) + options, args = parser.parse_args() + if options.quiet: + options.debug = False + command = None + if len(args) > 1: + command = '\n'.join(args) + if options.file: + parser.print_help() + sys.exit(1) + elif not options.file: + parser.print_help() + sys.exit(1) + return options, command + +def main(): + options, code_str = parse_options() + filename = options.file + if filename: + with tokenize.open(filename) as f: + code_str = f.read() + else: + filename = "" + + if options.benchmark: + stack_code = compile(code_str, filename, "exec") + else: + stack_code = None + + config = registervm.Config() + config.quiet = options.quiet + config.debug = options.debug + config.enable_unsafe_optimizations() + config.enable_buggy_optimizations() + def create_code_optimizer(config, options): + if options.twice: + nloops = 2 + else: + nloops = 1 + def code_optimizer(code): + for loop in range(nloops): + if not options.hide_main or code.co_name != '': + kw = { + 'dump_original': options.dump_original, + 'dump_optimized': options.dump_optimized, + 'dump_bytecode': options.dump_bytecode, + 'dump_blocks': options.dump_blocks, + } + else: + kw = {} + code = registervm.optimize_code( + code, config, + **kw) + return code + return code_optimizer + + + optimizer = create_code_optimizer(config, options) + if options.verbose: + print("Set code optimizer") + sys.setcodeoptimizer(optimizer) + + if options.verbose: + print("Compile") + code = compile(code_str, filename, "exec") + + if options.verbose: + print("Execute") + exec_code(code) + + if options.verbose: + print() + + if options.assembler: + if stack_code is not None: + print("Assembler (stack)") + dis.dis(stack_code) + + print("Assembler (register)") + dis.dis(code) + + if options.benchmark: + benchmark("stack", stack_code) + benchmark("register", code) + +main() diff -r fb80df16c4ff -r 6a9ca0177020 use_regs.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/use_regs.py Tue May 20 11:04:02 2014 +0200 @@ -0,0 +1,304 @@ +""" +Benchmark results on Linux-3.4.9-2.fc16.x86_64-x86_64-with-fedora-16-Verne +with Intel(R) Core(TM) i7-2600 CPU @ 3.40GHz. + +Bench fact +Stack: 54.7 ms +Register: 54.9 ms (+0.4%) +Bench fact_loop +Stack: 77.0 ms +Register: 76.8 ms (-0.2%) +Bench pow2 +Stack: 85.9 ms +Register: 81.1 ms (-5.5%) +Bench loop_sum +Stack: 27.0 ms +Register: 24.1 ms (-11.0%) +Bench loop_noop +Stack: 105.1 ms +Register: 102.5 ms (-2.4%) +Bench sieve +Stack: 45.5 ms +Register: 45.6 ms (+0.3%) +Bench bisect +Stack: 40.1 ms +Register: 36.5 ms (-8.9%) +Bench qsort2 +Stack: 158.5 ms +Register: 156.1 ms (-1.5%) +""" +from registervm import Converter, optimize_func, Config, LABEL +import dis +import gc +import sys +import time +import types + +CHECK_REFLEAKS = True + +class RewriteBytecode: + def __init__(self, func, config=None): + if isinstance(func, types.MethodType): + func = func.__func__ + self.func = func + self.code = Converter(func.__code__, config) + self.name = func.__name__ + + def dump(self): + #self.code.dump() + dis.dis(self.func) + ninstr = 0 + for instr in self.code.instructions: + if instr.name is not LABEL: + ninstr += 1 + print("-> %s instructions" % ninstr) + + def patch(self, dump_bytecode=True, dump_blocks=True): + if dump_bytecode: + self.dump() + print("=== %s ===>" % self.name) + self.code.convert(dump_blocks=dump_blocks) + if 0: + self.dump() + print("=== %s ===>" % self.name) + self.func.__code__ = self.code.compile() + if dump_bytecode: + self.dump() + self.code.check_inefficient_code() + print("=== %s ===>" % self.name) + +def use_register_bytecodes(config, func, *args): + if 1: + RewriteBytecode(func, config).patch() + else: + dis.dis(func) + print("====>") + optimize_func(func, config) + dis.dis(func) + print("====>") + print(func(*args)) + + if CHECK_REFLEAKS and hasattr(sys, "gettotalrefcount"): + for prepare in range(10): + func(*args) + gc.collect() + + before = None + leak = None + repeat = 10 + before = sys.gettotalrefcount() + for index in range(repeat): + func(*args) + gc.collect() + del index + gc.collect() + leak = sys.gettotalrefcount() - before + if leak: + raise Exception("reference leak: %s!" % leak) + print("(no reference leak)") + +def unary_not(x): + x = not x + return x + +def subx(): + x = 1 + x = x - x + return x + +def inplace_add(a, b, c): + return a + b + c + +def loop_sum(n): + total = 0 + for i in range(n): + total += i + return total + +def loop_noop(n): + for i in range(n): + pass + +def noop(): + return + +def load_const(): + total = 42 + return total + +def fact(n): + if n < 2: + return 1 + else: + return n * fact(n-1) + +def fact_loop(n): + n += 1 + f = 1 + for i in range(2, n+1): + f *= i + return f + +def pow2(n): + x = 1 + for i in range(n): + x *= 2 + return x + +def call_func(text): + x = len(text) + return x + +def noop(): + pass + +def _bench(func, args, repeat, loops): + best = None + for run in range(repeat): + before = time.perf_counter() + for loop in range(loops): + func(*args) + dt = (time.perf_counter() - before) + if best is not None: + best = min(best, dt) + else: + best = dt + return best + +def bench(config, func, *args, **kw): + repeat = kw.get('repeat', 50) + loops = kw.get('loops', 5) + print("Bench %s" % func.__name__) + best_empty = _bench(noop, (), repeat, loops) + a = _bench(func, args, repeat, loops) + optimize_func(func, config) + if func is qsort2: + optimize_func(partition, config) + b = _bench(func, args, repeat, loops) + a -= best_empty + b -= best_empty + print("Stack: %.1f ms" % (a*1e3)) + k = (b-a)*100./a + print("Register: %.1f ms (%+.1f%%)" % (b*1e3, k)) + +def sieve(max): + primes = list(range(2,max+1)) + for i in primes: + j = 2 + while i * j <= primes[-1]: + if i * j in primes: + primes.remove(i*j) + j += 1 + return primes + +def arccot(x, unity): + sum = xpower = unity // x + n = 3 + sign = -1 + while 1: + xpower = xpower // (x*x) + term = xpower // n + if not term: + break + sum += sign * term + sign = -sign + n += 2 + return sum + +def pi(digits): + unity = 10**(digits + 10) + pi = 4 * (4*arccot(5, unity) - arccot(239, unity)) + return pi // 10**10 + +def bisect(a, x): + lo = 0 + hi = len(a) + while lo < hi: + mid = (lo+hi)//2 + if x < a[mid]: + hi = mid + else: + lo = mid+1 + return lo + +def partition(sequence, l, e, g): + if not sequence: + return (l, e, g) + else: + head = sequence[0] + if head < e[0]: + return partition(sequence[1:], l + [head], e, g) + elif head > e[0]: + return partition(sequence[1:], l, e, g + [head]) + else: + return partition(sequence[1:], l, e + [head], g) + +def qsort2(sequence): + """ + Quicksort using a partitioning function + >>> qsort2<> + <> + >>> qsort2<> + <> + """ + if not sequence: + return [] + else: + pivot = sequence[0] + lesser, equal, greater = partition(sequence[1:], [], [pivot], []) + return qsort2(lesser) + equal + qsort2(greater) + +def build_list(x, y, z): + return x + y + z + +def build_list(x, y, z): + print(x, y, z) + +def yield_value(): + x = 3 + yield x + +def test_func(x): + if not x: + return 1 + else: + return x+1 + +# enable all optimizations! +config = Config() +config.quiet = False +config.enable_unsafe_optimizations() + +#use_register_bytecodes(config, unary_not, True) +#use_register_bytecodes(config, subx) +#use_register_bytecodes(config, loop_sum, 10) +#use_register_bytecodes(config, load_const) +#use_register_bytecodes(config, noop) +#use_register_bytecodes(config, loop_noop, 3) +#use_register_bytecodes(config, fact, 10) +#use_register_bytecodes(config, fact_loop, 10) +#use_register_bytecodes(config, pow2, 32) +#use_register_bytecodes(config, sieve, 100) +#use_register_bytecodes(config, arccot, 5, 10**(10 + 10)) +#use_register_bytecodes(config, call_func, "abc") +#use_register_bytecodes(config, bisect, tuple(range(100)), 1) +data = list(range(50)); import random; random.shuffle(data); data=tuple(data) +#use_register_bytecodes(config, qsort2, data) +#use_register_bytecodes(config, partition, [1, 2, 3], [], [2], []) +#use_register_bytecodes(config, inplace_add, [1], [2, 3], [4]) +#use_register_bytecodes(config, build_list, [1], [2], [3]) +#use_register_bytecodes(config, make_func) +#use_register_bytecodes(config, yield_value) +#use_register_bytecodes(config, test_func, 3) + +if 1: + bench(config, fact_loop, 1000, loops=100) + bench(config, fact, 800, loops=10**2) + bench(config, pow2, 1000, loops=10**3) + bench(config, loop_sum, 10**5) + bench(config, loop_noop, 10**6) + bench(config, sieve, 10**3) + bench(config, bisect, tuple(range(10**7)), 1, loops=10**4) + data = list(range(700)); import random; random.shuffle(data); data=tuple(data) + bench(config, qsort2, data, loops=10, runs=100) +