Christian Collberg University of Arizona
Obfuscation
- vs.
Deobfuscation
ISSISP 2018
Obfuscation vs. Deobfuscation ISSISP 2018 Christian Collberg - - PowerPoint PPT Presentation
Obfuscation vs. Deobfuscation ISSISP 2018 Christian Collberg University of Arizona 1. Obfuscating Arithmetic 2. Opaque Expressions 3. Control Flow Flattening 4. Constructing Opaque Expressions 5. Generic Deobfuscation 6. Turning up
Christian Collberg University of Arizona
ISSISP 2018
Encoding Integer Arithmetic
Example
One possible encoding of z=x+y+w is z = (((x ^ y) + ((x & y) << 1)) | w) + (((x ^ y) + ((x & y) << 1)) & w); Many others are possible, which is good for diversity.
tigress --Environment=x86_64:Linux:Gcc:4.6\
that it uses a + operator!
Exercise!
int fib(int n ) { ... while (1) { switch (*(_1_fib_$pc[0])) { case PlusA: { (_1_fib_$sp[0] + -1)->_int = (_1_fib_$sp[0] + -1)->_int + (_1_fib_$sp[0] + 0)->_int; break; } }
int fib(int n ) { ... while (1) { switch (*(_1_fib_$pc[0])) { case PlusA: { (_1_fib_$sp[0] + -1)->_int = ((_1_fib_$sp[0] + -1)->_int ^ (_1_fib_$sp[0] + 0)->_int) + (((_1_fib_$sp[0] + -1)->_int & (_1_fib_$sp[0] + 0)->_int) << 1); break; } }
Opaque Expressions An expression whose value is known to you as the defender (at
difficult for an attacker to figure out
Opaquely indeterminate predicate
P?
FALSE TRUE
PT
TRUE FALSE TRUE
PF
FALSE
Opaquely true/ false predicate An expression
Examples
true false
2|(x2 + x)T
ly true predicate:
true false
2|(x2 + x)T
ely indeterminate predicate:
false true
x mod 2 = 0?
if (x[k] == 1) R = (s*y) % n else R = s; s = R*R % n; L = R; if (x[k] == E=1) R = (s*y) % n else R = s; s = R*R % n; L = R;
if (x[k] == 1) R = (s*y) % n else R = s; s = R*R % n; L = R; if (x[k] == 1) R = (s*y) % n else R = s; if (expr=T) s = R*R % n; else s = R*R * n; L = R;
if (x[k] == 1) R = (s*y) % n else R = s; s = R*R % n; L = R; if (x[k] == 1) R = (s*y) % n else R = s; if (expr=?) s = R*R % n; else s = (R%n)*(R%n)%n; L = R;
Aritmetic Encoding
z = (((x ^ y) + ((x & y) << 1)) | w) + (((x ^ y) + ((x & y) << 1)) & w); z=x+y+w z = (((x ^ y) + ((x & y) << E=1)) | w) + (((x ^ y) + ((x & y) << E=1)) & w);
************************************************************ * 1) Opaque Predicates ************************************************************ tigress --Environment=x86_64:Linux:Gcc:4.6 --Seed=0 \
Exercise!
inside a function.
branch.
flow from A to B.
exit node.
Control-Flow Graph (CFG)
int foo() { printf(“Boo!”); }
ENTRY EXIT
printf(“Boo!”);
int foo() { x=1; y=2; printf(x+y); }
ENTRY EXIT
x=1; y=2; printf(x+y);
int foo() { read(x); if (x>0) printf(x); }
ENTRY EXIT
x=1; if (x>0) goto B2 printf(x)
int foo() { read(x); if (x>0) printf(x); else printf(x+1); }
ENTRY EXIT
x=1; if (x>0) goto B2 printf(x) printf(x+1)
int foo() { x=10; while (x>0){ printf(x); x=x-1; } }
ENTRY EXIT
x=10; if (x<=0) goto B3 printf(x); x=x-1; goto B1;
Flatten
int modexp( int y,int x[], int w,int n){ int R, L; int k=0; int s=0; while (k < w) { if (x[k] == 1) R = (s*y) % n else R = s; s = R*R % n; L = R; k++; } return L; }
if (k<w) if (x[k]==1) s=R*R mod n L = R k++ R=s R=(s*y) mod n s=1 k=0 return L
B6 : B1 : B2 : B5 : goto B1 B4 : B3 : B0 :
int modexp(int y, int x[], int w, int n) { int R, L, k, s; int next=0; for(;;) switch(next) { case 0 : k=0; s=1; next=1; break; case 1 : if (k<w) next=2; else next=6; break; case 2 : if (x[k]==1) next=3; else next=4; break; case 3 : R=(s*y)%n; next=5; break; case 4 : R=s; next=5; break; case 5 : s=R*R%n; L=R; k++; next=1; break; case 6 : return L; } }
next=3 if (k<w) else next=2 next=6 next=5 R=(s*y)%n R=s next=5 S=R*R%n L=R K++ next=1 return L k=0 s=1 next=1 next=0 switch(next) if (x[k]==1) else next=4
B5 B6 B0 B1 B3 B4 B2
Flattening Algorithm
switch
case 0: block_0 case n: block_n
block is a case:
case n: { if (expression) next = … else next = … }
ten this CFG:
ENTER EXIT goto B2 B6 X := X − 2; B5 Y := X + 5; B4 X := X−1; A[X] := 10; if X <> 4 goto B6 if x >= 10 goto B4 B2 B3 X := 20; B1
Flatten this CFG! Work with your friends!
int modexp(int y, int x[], int w, int n) { int R, L, k, s; int next=E=0; for(;;) switch(next) { case 0: k=0; s=1; next=E=1; break; case 1: if (k<w) next=E=2; else next=E=6; break; case 2: if (x[k]==1) next=E=3; else next=E=4; break; case 3: R=(s*y)%n; next=E=5; break; case 4: R=s; next=E=5; break; case 5: s=R*R%n; L=R; k++; next=E=1; break; case 6: return L; } }
next=E=1
Opaque Values
Opaque values from array aliasing
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 36 58 1 46 23 5 16 65 2 41 2 7 1 37 11 16 2 21 16
Invariants:
Invariants:
respectively.
int modexp(int y, int x[], int w, int n) { int R, L, k, s; int next=0; int g[] = {10,9,2,5,3}; for(;;) switch(next) { case 0 : k=0; s=1; next=g[0]%g[1]=1; break; case 1 : if (k<w) next=g[g[2]]=2; else next=g[0]-2*g[2]=6; break; case 2 : if (x[k]==1) next=g[3]-g[2]=3; else next=2*g[2]=4; break; case 3 : R=(s*y)%n; next=g[4]+g[2]=5; break; case 4 : R=s; next=g[0]-g[3]=5; break; case 5 : s=R*R%n; L=R; k++; next=g[g[4]]%g[2]=1; break; case 6 : return L; } }
Yadegari, et al., A Generic Approach to Deobfuscation. IEEE S&P’15
INPUT OUTPUT
TRACE ADD SUB BRA SHL CALL DIV XOR TRACE’ ADD BRA DIV XOR
Dynamic Analysis
P(){ }
PUSH() POP()
ADD SUB BRA SHL CALL DIV XOR ADD BRA SHL DIV XOR ADD
✓
SUB BRA
✓
SHL
✓
CALL DIV
✓
XOR
✓
Backward Taint Analysis (contribute to
ADD
✓
SUB BRA
✓
SHL
✓
CALL DIV XOR
Forward Taint Analysis (depend on input)
ADD BRA DIV XOR
Compiler Optimizations
ADD
✓
SUB BRA
✓
SHL
✓
CALL DIV
✓
XOR
✓c←b;
ADD BRA SHL DIV PRINT
P(){ } Not input dependent!
sub add call print
VPC
ADD SUB BRA SHL CALL DIV PRINT ADD BRA DIV PRINT ADD
✓
SUB BRA
✓
SHL
✓
CALL DIV
✓
✓
P(){ }
STACK: SP
int modexp(int y, int x[], int w, int n) { int R, L, k, s; int next=E=0; for(;;) switch(next) { case 0: k=0; s=1; next=E=1; break; case 1: if (k<w) next=E=2; else next=E=6; break; case 2: if (x[k]==1) next=E=3; else next=E=4; break; case 3: R=(s*y)%n; next=E=5; break; case 4: R=s; next=E=5; break; case 5: s=R*R%n; L=R; k++; next=E=1; break; case 6: return L; } }
next=E=1
Not input dependent!
DEC( ) DEC( ) DEC( ) ENC( ) DEC( )
Not input dependent!
Anti-Dynamic Analysis
SUB BRA SHL CALL DIV LEA BLE SLA BRA TEST LEA JREG XOR JMP FDIV CALL LEA MUL JMP BLE SRL CALL BOR STO BRA AND CALL FADD CALL JMP TEST LEA BLE ADD SUB BAND FADD BRLE FDIV JREG BRLE SRA FADD LD STO FSUB CALL BRA LD SEXT CALL BOR JMP
main(){ } if (traced) abort();
Pawlovski, et al., Probfuscation: An Obfuscation Approach …
Overhead Protection
Overhead Protection Resources Precision Resources Precision
ADD SUB BRA SHL CALL DIV XOR ADD BRA SHL DIV XOR ADD
✓
SUB BRA
✓
SHL
✓
CALL DIV
✓
XOR
✓
ADD
✓
SUB BRA
✓
SHL
✓
CALL DIV XOR
Taint Analysis
ADD BRA DIV XOR ADD
✓
SUB BRA
✓
SHL
✓
CALL DIV
✓
XOR
✓
but control dependence
no data dependence!
😅 ☹
Implicit Flow
bit value; void handler(…) { value ← 1 } main(){ value ← 0 signal(31,handler) if (b == 1) raise(31) a ← value }
still control dependence
☹
no data dependence!
😅
b
signal(){ … } I/O
bit value; void handler(…) { value ← 1 } main(){ value ← 0 signal(31,handler) if (b == 1) raise(31) a ← value } b b b b
signal() read() write() I/O libc GC pthread jitting
main(){ a ← b } b
Program Analysis
Visible State Invisible State
a
leak into invisible state ← b retrieve into visible state ←
Stephens, et al., Probabilistic Obfuscation through Covert Channels, Euro S&P 2018
start ← clock() a ← (clock()-start) > 𝜺 if (b == 1) else
“a depends
Program Analysis
no data or control dependence!
😅
start ← clock() if (b == 1) else a ← (clock()-start) > 𝜺 for(i=0; i<n; i++); for(i=0; i<m; i++);
signal() read() write() I/O libc GC pthreads jitting
start ← clock() if (b == 1) else a ← (clock()-start) > 𝜺
start ← clock() a ← (clock()-start) > 𝜺 for (i=0; i<n; i++) if (b == 1) flush ; read ; write else flush ; read ; write
buf1 buf1 buf2 buf1 buf2 buf1
if (b == 1)
else
start ← clock() read(“foo”) a ← (clock()-start) > 𝜺
if (b == 1) ; else foo()
Jitted binary code foo foo bar baz Bytecode
start ← clock() foo() a ← (clock()-start) > 𝜺
GC() if (b == 1) list ← else list ←
…
start ← clock() GC() a ← (clock()-start) > 𝜺
start ← clock() if (b == 1) else a ← (clock()-start )> 𝜺
☐ ← time() if (☐) ☐ else ☐ ☐ ← (time()-☐)>☐
Pattern Analysis
Issue #1: Pattern-Matching Attacks
start ← clock() flush if (b == 1) read write else read write a ← (clock()-start) > 𝜺
Issue #2: Correctness
tigress.cs.arizona.edu
Merge Split Dynamic Jitting
Encode Arithmetic Encode Data Opaque Predicates Encode Literals Branch Functions
Flatten
NEXT
Virtualize
≈
Anti Taint Analysis
main(){ x←E(“secret”) }
Exfiltrate!
“secret”!
main(){ a ← b }
main(){ a ← b }
read CPU temperature speed up/ slow down
main(){ a ← b }
listen to hard drive speed up/ slow down
main(){ a ← b }
read from camera flash to screen
main(){ a ← b }
read from microphone play sound
main(){ a ← b }
main(){ a ← b }
Disassemble
01101010 01010101 00001110
machine code
int foo() { … … … … } add r1,r2,r3 ld r2,[r3] call bar cmp r1,r4 bgt L2
Compile
55 48 89 e5 48 83 c7 68 48 83 c6 68 5d e9 26 38 00 00 55 48 89 e5 48 89 e5 48 8d 46 68 48 89 c7 5d e9 11 38 00 00 55
disassemblers get it wrong!
case: program analysis is more or less precise.
Disassemble
01101010 01010101 00001110 add r1,r2,r3 ld r2,[R4] call bar mul r1,r4 bgt L2
55 48 89 e5 48 83 c7 68 48 83 c6 68 5d e9 26 38 00 00 55
Linear sweep disassembly
55 48 89 e5 48 83 c7 68 48 83 c6 68 5d e9 26 38 00 00 55
Recursive traversal disassembly
0x45b0 0xd8a 0xd78
Stack
7.0xd8b: mov %rdi,%rbp
Indirect jump!
Exercise!
disassembly handle this code?
if (PF) asm(“.byte 0x55 0x23 0xff…”);
common in malware.
Insert Bogus Dead Code
uint32 Skypes_hash_function () { addr_t addr =(addr_t)((uint32)addr ^(uint32)addr); addr = (addr_t)((uint32) addr + 0 x688E5C); uint32 hash = 0x320E83 ^ 0x1C4C4 ; int bound = hash + 0 xFFCC5AFD ; do { uint32 data =*((addr_t)((uint32)addr + 0x10)); goto b1; asm volatile (".byte 0x19"); b1: hash = hash ⊕ data ; addr -= 1; bound --; } while (bound !=0); goto b2; asm volatile (".byte 0x73"); b2: goto b3; asm volatile (".word 0xC8528417,…”); b3: hash -= 0x4C49F346; return hash; }
jmp b … … … a: Branch Functions call bf a: void branchFun(){ } r = ret_addr(); return to (r+α); return;
… … …
.byte 42,…
************************************************************ * 7) Branch Functions ************************************************************ tigress --Environment=x86_64:Linux:Gcc:4.6 \
Exercise!
Tamperproofing has to do two things:
Essentially: but this is too unstealthy! if (tampering-detected()) respond-to-tampering()
int foo () { if (today > “Aug 17,2016”){ printf(“License expired!”); abort; } } int foo () { if (false){ printf(“License expired!”); abort; } } int foo () { if (today > “Aug 17,2016”){ printf(“License expired!”); abort; } }
check(){ if (hash(foo)!=42) abort() }
if (foo-has-changed-in-any-way())
Checker1
int foo() { … … … … } int foo_copy() { … … … … }
copy Repair foo!!! int foo() { … … … … } int foo_copy() { … … … … }
if (foo-has-changed-in-any-way())
Checker1
int foo() { … … … … } int foo_copy() { … … … … }
copy
Repair Checker1!!
if (checker1-has-changed())
Checker2
if (foo-changed())
Checker1
foo_copy copy foo
if (foo-changed())
Checker1_copy
foo_copy copy foo
copy
Checker Checker Checker Repair Repair Repair Code block Code block Code block
Secure Hardware
Crypto-Based Obfuscation
≈
Language-Based Obfuscation
≈
Language-Based Obfuscation
Secure Hardware
Crypto-Based Obfuscation
Program Generate Run 2-bit multiplier 1027 years 108 years 16-bit point function 7 hours, 25G 4 hours (later, 20 minutes)
≈
Updatable Security Strong Root of Trust Security guarantees for small security kernels
Problem #2: Combine Protections
Supported by NSF CNS-1525820
tigress.cs.arizona.edu collberg.cs.arizona.edu
Thanks to my collaborators: Debray, Stephens, Yadegari, Scheidegger, Levine