Detection of Metamorphic and Virtualization-based Malware using Algebraic Specification — Maude Specification

Matt Webster and Grant Malcolm

Instructions

To use the specification, save the specification below in a file called i64.maude, execute Maude and type in i64.

The proofs from Examples 1 and 2 can be found below. To run the proofs, save using the file names below and type in proof-ex1 and in proof-ex2.

The search from Figure 2 can be run by downloading the file below and typing in i64-2.

If you require assistance with the Maude specification given on this page, please contact Matt Webster.

Maude Specification

File: i64.maude.

*** i64.maude
*** Specification of a subset of the Intel 64 instruction set:
*** {MOV, ADD, SUB, TEST, XOR, AND, OR, PUSH, POP, NOP}.
*** By Matt Webster, 2006-8. 
*** Used with Maude 2.3 built: Feb 14 2007 17:53:50



*** This module defines the syntax of a subset of I-64.
mod I-64-SYNTAX is 
	protecting INT .
	sorts Variable Expression Stack EInt .
	sorts Instruction InstructionSequence .
	subsort Instruction < InstructionSequence .
	subsorts Variable EInt < Expression .
	subsort Int < EInt .
	
	op dadd_,_ : Variable Expression -> Instruction [prec 20] .
	op dsub_,_ : Variable Expression -> Instruction [prec 20] .	
	
	*** I-64 instructions	
	op mov_,_ : Variable Expression -> Instruction [prec 20] .
	op add_,_ : Variable Expression -> Instruction [prec 20] .
	op sub_,_ : Variable Expression -> Instruction [prec 20] .
	op nop : -> Instruction .
	op push_ : Expression -> Instruction [prec 20] .
	op pop_ : Variable -> Instruction [prec 20] .	
	op and_,_ : Variable Expression -> Instruction [prec 20] .
	op or_,_ : Variable Expression -> Instruction [prec 20] .
	op xor_,_ : Variable Expression -> Instruction [prec 20] .
	op test_,_ : Variable Expression -> Instruction [prec 20] .
	*** helper operations
	op stackPush : Expression Stack -> Stack .
	op stackPop : Stack -> Stack .
	op stackTop : Stack -> EInt .	
	op _next_ : EInt Stack -> Stack [prec 15] .
	op stackBase : -> Stack .
	op msb : EInt -> EInt .
	op isZero : Expression -> EInt .	
	op isZero : EInt -> EInt .
	op parity : EInt -> EInt .
	*** error messages
	op emptyStackError1 : -> Stack .
	op emptyStackError2 : -> EInt .
	*** I-64 registers
	ops eax ebx ecx edx ebp esp esi edi ip : -> Variable .
	*** I-64 EFLAGS register
	ops cf of sf af zf pf : -> Variable .
	*** equality operation 
	op _is_ : EInt EInt -> Bool .
	op _is_ : Stack Stack -> Bool .
	*** extending the Int sort to include "undef"
	op undef : -> EInt .
	
	
	*** overloaded Boolean operations - these should be in proof scripts
	*** but OBJ3 v.2.0 won't allow it
	op _band_ : EInt EInt -> EInt [prec 35] .
	op _bor_ : EInt EInt -> EInt [prec 35] .
endm

*** This module defines the semantics of the I-64 instructions
*** whose syntax is defined in I-64-SYNTAX.
mod I-64-SEMANTICS is
	protecting I-64-SYNTAX .
	sort Store .

	*** stores
	ops s : -> Store .
	op initial : -> Store .

	*** operators for defining the semantics of I-64
	op _[[_]] : Store Expression -> EInt [prec 30] .
	op _[[stack]] : Store -> Stack [prec 30] .	
	op _;_ : Store Instruction -> Store [prec 25] .
 	op _;_ : InstructionSequence InstructionSequence -> InstructionSequence [gather (E e) prec 26] .
 	
	*** variables for rewriting rules
	vars S S1 S2 S3 : Store .
	vars I I1 I2 I3 : EInt .
	vars INT INT1 INT2 : Int .
	vars V V1 V2 V3 : Variable .
	vars E E1 E2 E3 : Expression .
	vars ST ST1 ST2 : Stack .
 	vars P1 P2 : InstructionSequence . 

	*** evaluation of instruction sequences
 	eq S ; (P1 ; P2 ) = (S ; P1) ; P2 .

	*** _is_ semantics
	eq I1 is I2 = (I1 == I2) .
	eq ST1 is ST2 = (ST1 == ST2) .
	*** the value of any integer in a store is the integer itself
	eq S[[I]] = I .		
	*** initial values of variables and the stack
	eq initial[[stack]] = stackBase .
	ceq initial[[V]] = undef 
		if V =/= ip .
	eq initial[[ip]] = 0 .
	
	*** Axioms to deal with static analysis of primitive 
	*** operators such as +, -, |, &, xor .
	eq I | I = I .
	eq I & I = I .
	eq (I1 + I2) is (I3 + I2) = I1 is I3 .	
	eq (I1 + I2) is (I1 + I2) = true .	
	eq (I1 - I2) is (I1 - I2) = true .	
	eq (I1 | I2) is (I1 | I2) = true .
	eq (I & S1[[V1]]) is (I & S2[[V2]]) = S1[[V1]] is S2[[V2]] .	
	eq isZero(I1 & I2) is isZero(I1 & I2) = true .
	eq parity(I1 & I2) is parity(I1 & I2) = true .		
	eq msb(I1 & I2) is msb(I1 & I2) = true .
	eq isZero(I1 | I2) is isZero(I1 | I2) = true .
	eq parity(I1 | I2) is parity(I1 | I2) = true .		
	eq msb(I1 xor I2) is msb(I1 xor I2) = true .
	eq isZero(I1 xor I2) is isZero(I1 xor I2) = true .
	eq parity(I1 xor I2) is parity(I1 xor I2) = true .
	eq msb(I1 | I2) is msb(I1 | I2) = true .
	eq (I1 xor I2) is (I1 xor I2) = true .
	
	
	*** I-64 instruction semantics	
	eq S ; and V,E [[V]] = S[[V]] & S[[E]] .
	ceq S ; and V1,E [[V2]] = S[[V2]]
		if V1 =/= V2 and V2 =/= ip and V2 =/= sf and V2 =/= zf 
		and V2 =/= pf and V2 =/= cf and V2 =/= of .
	eq S ; and V,E [[stack]] = S[[stack]] .		
	eq S ; and V,E [[ip]] = S[[ip]] + 1 .
	eq S ; and V,E [[sf]] = msb( S[[V]] & S[[E]] ) .
	eq S ; and V,E [[zf]] = isZero( S[[V]] & S[[E]] ) .
	eq S ; and V,E [[pf]] = parity( S[[V]] & S[[E]] ) .
	eq S ; and V,E [[cf]] = 0 .
	eq S ; and V,E [[of]] = 0 .	
																		
	eq S ; or V,E [[V]] = S[[V]] | S[[E]] .
	ceq S ; or V1,E [[V2]] = S[[V2]]
		if V1 =/= V2 and V2 =/= ip and V2 =/= sf and V2 =/= zf 
		and V2 =/= pf and V2 =/= cf and V2 =/= of .
	eq S ; or V,E [[stack]] = S[[stack]] .		
	eq S ; or V,E [[ip]] = S[[ip]] + 1 .	
	eq S ; or V,E [[sf]] = msb( S[[V]] | S[[E]] ) .
	eq S ; or V,E [[zf]] = isZero( S[[V]] | S[[E]] ) .
	eq S ; or V,E [[pf]] = parity( S[[V]] | S[[E]] ) .
	eq S ; or V,E [[cf]] = 0 .
	eq S ; or V,E [[of]] = 0 .	

	eq S ; xor V,E [[V]] = S[[V]] xor S[[E]] .
	ceq S ; xor V1,E [[V2]] = S[[V2]]
		if V1 =/= V2 and V2 =/= ip and V2 =/= sf and V2 =/= zf 
		and V2 =/= pf and V2 =/= cf and V2 =/= of .
	eq S ; xor V,E [[stack]] = S[[stack]] .	
	eq S ; xor V,E [[ip]] = S[[ip]] + 1 .
	eq S ; xor V,E [[sf]] = msb( S[[V]] xor S[[E]] ) .
	eq S ; xor V,E [[zf]] = isZero( S[[V]] xor S[[E]] ) .
	eq S ; xor V,E [[pf]] = parity( S[[V]] xor S[[E]] ) .
	eq S ; xor V,E [[cf]] = 0 .
	eq S ; xor V,E [[of]] = 0 .	
									
	eq S ; test V,E [[V]] = S[[V]] .
	ceq S ; test V1,E [[V2]] = S[[V2]] 
		if V2 =/= ip and V2 =/= sf and V2 =/= zf 
		and V2 =/= pf and V2 =/= cf and V2 =/= of .
	eq S ; test V,E [[stack]] = S[[stack]] .		
	eq S ; test V,E [[ip]] = S[[ip]] + 1 .
	eq S ; test V,E [[sf]] = msb( S[[V]] & S[[E]] ) .
	eq S ; test V,E [[zf]] = isZero( S[[V]] & S[[E]] ) .
	eq S ; test V,E [[pf]] = parity( S[[V]] & S[[E]] ) .
	eq S ; test V,E [[cf]] = 0 .
	eq S ; test V,E [[of]] = 0 .	

	eq S ; mov V,E [[V]] = S[[E]] .
	ceq S ; mov V1,E [[V2]] = S[[V2]] 
		if V1 =/= V2 and V2 =/= ip .	
	eq S ; mov V,E [[stack]] = S[[stack]] .
	eq S ; mov V,E [[ip]] = S[[ip]] + 1 .
	
	eq S ; add V,E [[V]] = (S[[V]] + S[[E]]) .
	ceq S ; add V1, E [[V2]] = S[[V2]]
		if V1 =/= V2 and V2 =/= ip .
	eq S ; add V,E [[stack]] = S[[stack]] .		
	eq S ; add V,E [[ip]] = S[[ip]] + 1 .
	
	*** special version of add ("dynamic add") that keeps
	*** results of additions within I-64 limits (2^32-1).
	eq S ; dadd V,E [[V]] = (S[[V]] + S[[E]]) & 4294967295 .
	ceq S ; dadd V1, E [[V2]] = S[[V2]]
		if V1 =/= V2 and V2 =/= ip .
	eq S ; dadd V,E [[stack]] = S[[stack]] .		
	eq S ; dadd V,E [[ip]] = S[[ip]] + 1 .

	eq S ; sub V,E [[V]] = (S[[V]] - S[[E]]) .
	ceq S ; sub V1, E [[V2]] = S[[V2]]
		if V1 =/= V2 and V2 =/= ip .
	eq S ; sub V,E [[stack]] = S[[stack]] .		
	eq S ; sub V,E [[ip]] = S[[ip]] + 1 .
	
	*** special version of add ("dynamic sub") that keeps
	*** results of additions within I-64 limits (2^32-1).
	eq S ; dsub V,E [[V]] = (S[[V]] - S[[E]]) & 4294967295 .
	ceq S ; dsub V1, E [[V2]] = S[[V2]]
		if V1 =/= V2 and V2 =/= ip .
	eq S ; dsub V,E [[stack]] = S[[stack]] .		
	eq S ; dsub V,E [[ip]] = S[[ip]] + 1 .
		
	eq S ; push E [[stack]] = stackPush(S[[E]],S[[stack]]) .
	ceq S ; push E [[V]] = S[[V]] 
		if V =/= ip .	
	eq S ; push E [[ip]] = S[[ip]] + 1 .	

	eq S ; pop V [[stack]] = stackPop(S[[stack]]) .
	eq S ; pop V [[V]] = stackTop(S[[stack]]) .
	ceq S ; pop V1 [[V2]] = S[[V2]] 
		if V1 =/= V2 and V2 =/= ip .	
	eq S ; pop V [[ip]] = S[[ip]] + 1 .	
	
	ceq S ; nop [[V]] = S[[V]] 
		if V =/= ip .
	eq S ; nop [[stack]] = S[[stack]] .
	eq S ; nop [[ip]] = S[[ip]] + 1 .

	*** Stack helper operations semantics
	eq stackPush(I,ST) = I next ST . 
	eq stackPop(I next ST) = ST .
	eq stackPop(stackBase) = emptyStackError1 .
	eq stackTop(I next ST) = I .
	eq stackTop(stackBase) = emptyStackError2 .	
endm

Example 1

File: proof-ex1.maude.

*** Matt Webster, January 2008.

*** Proof script for virus Win9x.Zmorph.A code fragments.
*** Read in file sta.obj first.

fmod IA-32-PROOF is
	pr IA-32-SEMANTICS .
	ops g h psi : Store -> Store .
	var S : Store . 

	*** Allomorph 1 -vs- Allomorph 2
	eq g(S) = 	S ; mov edi, 2580774443 ;
										mov ebx, 467750807 ;
										sub ebx, 1745609157 ;
										sub edi, 150468176 ;
										xor ebx, 875205167 ;
										push edi ;
										xor edi, 3761393434 ;
										push ebx ;       
										push edi .
	eq h(S)	=	S ;	mov ebx, 535699961 ;
										mov edx, 1490897411 ;
										xor ebx, 2402657826 ;
										mov ecx, 3802877865 ;
										xor edx, 3743593982 ;
										add ecx, 2386458904 ;
										push ebx ;
										push edx ;       
										push ecx .

	eq psi(S)	=	S ;	mov edi, 0 ;
										mov ebx, 0 ;
										mov ecx, 0 ;
										mov edx, 0 .									
endfm									


*** Proof: equivalence modulo the stack 

*** These should be true
red g(s)[[stack]] is h(s)[[stack]] .
red g(s)[[ip]] is h(s)[[ip]] .

*** QED

red g(s)[[stack]] .

red psi(g(s))[[stack]] is psi(h(s))[[stack]] .
red psi(g(s))[[ip]] is psi(h(s))[[ip]] .
red psi(g(s))[[edi]] is psi(h(s))[[edi]] .
red psi(g(s))[[ebx]] is psi(h(s))[[ebx]] .
red psi(g(s))[[ecx]] is psi(h(s))[[ecx]] .
red psi(g(s))[[edx]] is psi(h(s))[[edx]] .

Example 2

File: proof-ex2.maude.

*** Matt Webster, January 2008.

*** Proof script for virus Win9x.Zmorph.A code fragments.
*** Read in file sta.obj first.

fmod IA-32-PROOF is
	pr IA-32-SEMANTICS .
	ops g h psi' : Store -> Store .
	var S : Store . 

	*** Allomorph 1 -vs- Allomorph 2
	eq g(S) = 	S ; mov edi, 2580774443 ;
										mov ebx, 467750807 ;
										sub ebx, 1745609157 ;
										sub edi, 150468176 ;
										xor ebx, 875205167 ;
										push edi ;
										xor edi, 3761393434 ;
										push ebx ;       
										push edi .
	eq h(S)	=	S ;	mov ebx, 535699961 ;
										mov edx, 1490897411 ;
										xor ebx, 2402657826 ;
										mov ecx, 3802877865 ;
										xor edx, 3743593982 ;
										add ecx, 2386458904 ;
										push ebx ;
										push edx ;       
										push ecx .

	eq psi'(S)	=	S ;	pop edi ;
										pop ebx ;
										pop ecx ;
										mov edx, ecx .								
endfm									


*** Proof: equivalence modulo the stack 

*** These should be true
red g(s)[[stack]] is h(s)[[stack]] .
red g(s)[[ip]] is h(s)[[ip]] .

*** QED

red g(s)[[stack]] .

red psi'(g(s))[[stack]] is psi'(h(s))[[stack]] .
red psi'(g(s))[[ip]] is psi'(h(s))[[ip]] .
red psi'(g(s))[[edi]] is psi'(h(s))[[edi]] .
red psi'(g(s))[[ebx]] is psi'(h(s))[[ebx]] .
red psi'(g(s))[[ecx]] is psi'(h(s))[[ecx]] .
red psi'(g(s))[[edx]] is psi'(h(s))[[edx]] .

Automatic Generation of Metamorphic Code

The following Maude module defines the rewriting space in Figure 2.
mod METAMORPHIC is
	pr I-64-SEMANTICS .
	
	op "sidt" : -> EInt . *** "sidt" is a special integer
	
	op s : -> Store .
	
	var S : Store .
	
	rl [1] : S => S ; mov ebx, "sidt" .
	rl [2] : S => S ; mov eax, ebx .
	rl [3] : S => S ; mov ecx, ebx .
	rl [4] : S => S ; mov eax, ecx .

endm
Executing the following search command will generate 1000 different programs that satisfy the condition of putting "sidt" in the eax register:
search [1000] in METAMORPHIC :
  S =>+ S such that S[[eax]] is "sidt" .
A Maude file containing this search is available: i64-2.maude.