3. Scratch pad

 

"In order to make an apple pie from scratch, you must first create the universe."

 Carl Sagan, Cosmos

This chapter is an incoherent collection of platform independent items that found no other place.

3.1. objdump_format.pl

Used at objdump -d[ABC].

Note that the strings ${TEVWH_ASM_COMMENT} and ${TEVWH_ASM_RETURN} are substituted by a sed script. perl never sees the "${". See Variables and packages[ABC] for details and actual values.

TEVWH_ASM_RETURN is a regular expression in perl syntax. Unfortunately plain sed on FreeBSD 4.7 has no "\|" or anything equivalent while option -E switches to modern regular expressions with incompatible syntax. The matter is further complicated by branch delay slots. On Sparc the instruction following a ret is executed while the jump is under way. A typical instruction to put there is restore. But triggering on that is not a clean solution.

Source: src/magic_elf/objdump_format.pl
#!/usr/bin/perl -sw

# Perl 5.005_03 (part of FreeBSD 4.7) does not have [:xdigit:]
$::start_address='[0-9a-fA-F]+' if (!defined($::start_address));

# skip to start address
my $log = '';
while(1)
{
  if (!($_ = <>)) { print $log; exit 0; }
  last if m/^\s*$::start_address:/;
  $log .= $_;
}
for(;;)
{
  s/\s+$//;
  my $comment = s/\s+(${TEVWH_ASM_COMMENT})\s*(.*)// ? "$1 $2" : '';

  my ( $addr, $hexdump, $asm ) = split(/ *\t/);
  last if (!defined($asm)); # also catches the "..."-lines
  my $line = sprintf("%-11s %-19s ", $addr, $hexdump);
  $asm = sprintf('%-7s %s', $1, $2) if ($asm =~ m/^(\S+)\s+(.*)/);
  $line = sprintf("%-11s %-19s %s", $addr, $hexdump, $asm);

  $line = sprintf("%-s59 %s", $line, $comment) if (length($comment) > 0);
  print $line . "\n";

  last if ($asm =~ m/\b${TEVWH_ASM_RETURN}\b/);
  last if (!($_ = <>));
}

3.2. gdb_format.pl

Used at GDB to the rescue[ABC] and The entry point[ABC].

Source: src/magic_elf/gdb_format.pl
#!/usr/bin/perl -nw
if (m/([^:]+):\s+(\S+)\s+(.*)/)
{
  # %-30s to cover "0x804c1c0 <__libc_write+32>:"
  printf "%-30s%-13s ", $1 . ':', $2;
  my $opcode = $2;
  my $rest = $3;
  if ($rest =~ s/\s+${TEVWH_ASM_COMMENT}\s*(.*)//)
    { printf "%-20s${TEVWH_ASM_COMMENT} %s\n", $rest, $1; }
  else
    { print $rest . "\n"; }

  exit(0) if ($opcode =~ m/${TEVWH_ASM_RETURN}/);
}

3.3. Offset of e_entry

Output is at Offset of e_entry[ABC]. Variables prefixed with TEVWH_[ABC] might also be interesting.

Source: src/evil_magic/ofs_entry.c
#include <stddef.h>
#include <stdio.h>
#include <elf.h>

#include <config.h>

/* necessary to dereference TEVWH_ELF_xxx */ 
#define QUOTE(n)	#n

#define SIZEOF_TYPE(type) \
   printf("sizeof_" QUOTE(type) "=%lu\n", (unsigned long)sizeof(type))
#define OFFSETOF(struct, member) \
  printf("offset_" QUOTE(member) "=%lu\n", \
    (unsigned long)offsetof(struct, member))
#define SIZEOF_MEMBER(struct, member) \
   printf("sizeof_" QUOTE(member) "=%lu\n", \
     (unsigned long)sizeof(((struct*)0)->e_entry))

int main()
{
  SIZEOF_TYPE(int);
  SIZEOF_TYPE(long);
  SIZEOF_TYPE(size_t);
  SIZEOF_TYPE(TEVWH_ELF_EHDR);
  SIZEOF_TYPE(TEVWH_ELF_SHDR);
  SIZEOF_TYPE(TEVWH_ELF_PHDR);
  OFFSETOF(TEVWH_ELF_EHDR, e_entry);
  SIZEOF_MEMBER(TEVWH_ELF_EHDR, e_entry);
  OFFSETOF(TEVWH_ELF_EHDR, e_phoff);
  SIZEOF_MEMBER(TEVWH_ELF_EHDR, e_phoff);
  return 0;
}

3.4. Extracting e_entry

Described at Extracting e_entry[ABC], used at The entry point[ABC].

Source: src/evil_magic/e_entry.c
#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
#include <elf.h>

#include <config.h>

int main(int argc, char** argv)
{
  const char* p;
  TEVWH_ELF_EHDR ehdr;

  while(0 != (p = *++argv))
  {
    int fd = open(p, O_RDONLY);
    if (fd != -1 && sizeof(ehdr) == read(fd, &ehdr, sizeof(ehdr)))
    { /* print both entry point and offset */
      /* lower case is required to match with objdump's disassembly */
      printf("%lx %lu\n", ehdr.e_entry, ehdr.e_entry - TEVWH_ELF_BASE);
    }
  }
  return 0;
}

3.5. Dressing up binary code

Used at Devil in disguise[ABC] to convert binary files into valid C code, i.e. the definition of a byte array. This started as small filter written in perl, but now it has a lot of features. We need to process the output of both ndisasm and objdump, on multiple platforms. Examples for valid input (i386, sparc, alpha):

08048080  6A04              push byte +0x4
10074:	82 10 20 04	mov	4, %g1
1200000b0:	02 00 bb 27	ldah	gp,2(t12)

The __attribute__ clause is explained in A section called .text[ABC].

Initializing the array with string literals (looking like \xDE\xAD\xBE\xEF) is easier. The terminating zero would not work with Doing it in C[ABC], however. But then using a list of hexadecimal numbers introduces separating comas, requiring special treatment of the last line.

If command line option -last_line_is_ofs is passed to the program then the last line of disassembly is meant to specify a offset into the code. Actually it's just the last byte of that line. You are free to use any dummy operation, see the example input above. A real world example is at Infection #1[ABC]. The last instruction itself is not emitted to the byte array. Instead enum constant ENTRY_POINT_OFS is defined.

Source: src/platform/disasm.pl
#!/usr/bin/perl -sw
use strict;

my $LINE = "  %-30s /* %-32s */\n";

$::identifier = 'main' if (!defined($::identifier));
$::size = '' if (!defined($::size));
$::align = '8' if (!defined($::align));
$::section = '.text' if (!defined($::section));

printf "const unsigned char %s[%s]\n", $::identifier, $::size;
print "__attribute__ (( aligned($::align), section(\"$::section\") )) =\n";
print "{\n";

my $code_size = 0;
my @line;
while(<>)
{
  s/^\s+//;		# trim leading white space
  s/\s+$//;		# trim trailing white space
  s/\s+[!;].*//;	# trim trailing comments

  my $addr = (split(/[:\s]+/))[0];
  s/[A-Fa-f0-9]+:?\s+//;

  my @code = split(/\s\s+/);
  my $code = $code[0];
  $code =~ s/\s//g;	# make objdump look like ndisasm

  $code_size += length($code) / 2;
  my $dump = '0x' . substr($code, 0, 2);
  for(my $i = 2; $i < length($code); $i += 2)
  {
    $dump .= ',0x' . substr($code, $i, 2);
  }
  push @line, [ $addr . ': ' . join(' ', @code[1..$#code]), $code, $dump ]
}

my $nr = 0;
my $max = $#line;
$max -= 1 if (defined($::last_line_is_ofs));
while($nr < $max)
{
  printf $LINE, $line[$nr][2] . ',', $line[$nr][0];
  $nr++;
}
printf $LINE, $line[$nr][2], $line[$nr][0];
printf "}; /* %d bytes (%#x) */\n", $code_size, $code_size;
if (defined($::last_line_is_ofs))
{
  my $ofs = substr($line[$nr + 1][1], -2, 2);
  printf "enum { ENTRY_POINT_OFS = 0x%x };\n", hex($ofs);
}

3.6. Self modifying code

This is the platform independent part of Self modifying code[ABC].

Source: src/evil_magic/self_modify.c
#include <setjmp.h>
#include <signal.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#include "func.inc"

typedef void (*PfnVoid)(void);

#define MEMCPY_TEST(where) \
	memcpy(in_##where, in_code, sizeof(in_code)); \
	test(#where, (PfnVoid)in_##where)

static jmp_buf env;
static int received_sigill = 0;
static void on_sigill(int sig)
{
  printf(" on_sigill=%d ", sig);
  received_sigill = 1;
  longjmp(env, 1);
}

static void test(const char* name, PfnVoid code)
{
  printf("%8p is %s ... ", code, name);
  fflush(stdout);
  received_sigill = 0;
  if (0 == setjmp(env))
  {
    signal(SIGILL, on_sigill);
    code();
  }
  printf(" sigill=%d\n", received_sigill);
}

static char in_data[sizeof(in_code)];

int main()
{
  char* in_heap = malloc(sizeof(in_code));
  char in_stack[sizeof(in_code)];

  test("code", (PfnVoid)in_code);
  MEMCPY_TEST(data);
  MEMCPY_TEST(heap);
  MEMCPY_TEST(stack);
  return 0;
}