### BEGIN LICENSE ###
### Use of the triage tools and related source code is subject to the terms
### of the license below.
###
### ------------------------------------------------------------------------
### Copyright (C) 2011 Carnegie Mellon University. All Rights Reserved.
### ------------------------------------------------------------------------
### Redistribution and use in source and binary forms, with or without
### modification, are permitted provided that the following conditions are
### met:
###
### 1. Redistributions of source code must retain the above copyright
###    notice, this list of conditions and the following acknowledgments
###    and disclaimers.
###
### 2. Redistributions in binary form must reproduce the above copyright
###    notice, this list of conditions and the following disclaimer in the
###    documentation and/or other materials provided with the distribution.
###
### 3. All advertising materials for third-party software mentioning
###    features or use of this software must display the following
###    disclaimer:
###
###    "Neither Carnegie Mellon University nor its Software Engineering
###     Institute have reviewed or endorsed this software"
###
### 4. The names "Department of Homeland Security," "Carnegie Mellon
###    University," "CERT" and/or "Software Engineering Institute" shall
###    not be used to endorse or promote products derived from this software
###    without prior written permission. For written permission, please
###    contact permission@sei.cmu.edu.
###
### 5. Products derived from this software may not be called "CERT" nor
###    may "CERT" appear in their names without prior written permission of
###    permission@sei.cmu.edu.
###
### 6. Redistributions of any form whatsoever must retain the following
###    acknowledgment:
###
###    "This product includes software developed by CERT with funding
###     and support from the Department of Homeland Security under
###     Contract No. FA 8721-05-C-0003."
###
### THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
### CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER
### EXPRESS OR IMPLIED, AS TO ANY MATTER, AND ALL SUCH WARRANTIES, INCLUDING
### WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
### EXPRESSLY DISCLAIMED. WITHOUT LIMITING THE GENERALITY OF THE FOREGOING,
### CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND
### RELATING TO EXCLUSIVITY, INFORMATIONAL CONTENT, ERROR-FREE OPERATION,
### RESULTS TO BE OBTAINED FROM USE, FREEDOM FROM PATENT, TRADEMARK AND
### COPYRIGHT INFRINGEMENT AND/OR FREEDOM FROM THEFT OF TRADE SECRETS.
### END LICENSE ###
'''
A collection of Python objects that wrap and extend the GDB Python API.

The objects in this file should generally not be instantiated directly--
they should be accessed via getTarget()

Note that for many objects defined in this file, GDB is queried only when the
object is instantiated: if the state of the Inferior changes, any previously
created instances should be considered stale.
'''
try:
    import gdb
except ImportError as e:
    raise ImportError("This script must be run in GDB: ", str(e))

import warnings
import re
import hashlib
from tools import *

def gdb_uint(gdb_value):
    '''
    Returns a gdb.Value of an unsigned GDB Type corresponding to the
    supplied gdb.Value.

    The GDB module (as of 7.3) evaluates expressions including certain
    registers (any register that isn't eip, esp, or ebp) to a signed type.
    So, for example, '$edx' is considered negative if the highest order bit is
    set. We treat evaluated expressions as addresses sometimes, such as
    when comparing an evaluated expression to si_addr (which is of an
    unsigned GDB.Value type). This method can be used to ensure the types
    agree and that arithmetic/boolean operations evaluate as expected.
    '''
    return gdb_value.cast(gdb.lookup_type('unsigned long'))

class GdbWrapperError(RuntimeError):
    '''
    Base class for errors in this module
    '''
    pass

class ProcMaps(list):
    '''
    A list of process address mappings. This object should only be instantiated
    when the Inferior is running, otherwise the string parsing may fail.
    '''
    def __init__(self):
        '''
        Queries the GDB Python API for the process address space, parses
        it, and appends it to self
        '''
        super (ProcMaps, self).__init__ ()

        mapstr = str(gdb.execute("info proc map", False, True))
        header_pos = mapstr.find("Start Addr")
        if header_pos == -1:
            raise GdbWrapperError("Unable to parse 'info proc map' string")
        maplines = mapstr[header_pos:].splitlines()[1:]
        for line in maplines:
            line = line.split()
            start, end, size, offset = tuple([int(i,16) for i in line[0:4]])
            name = " ".join(line[4:])
            ad = AttrDict(start=start, end=end, size=size,
                          offset=offset, name=name)
            self.append(ad)

    def __str__(self):
        result = "%10s %10s %10s %10s %s\n" % \
            ("start", "end", "size", "offset", "name")
        for m in self:
            result += "0x%08x 0x%08x 0x%08x 0x%08x %s\n" % \
                (m.start, m.end, m.size, m.offset, m.name)
        return result

    def findByName(self, name):
        '''
        Returns the process address mapping whose name matches name
        or None if no such mapping is found in self.
        '''
        for map in self:
            if map.name == name:
                return map
        return None

    def findByAddr(self, addr):
        '''
        Returns the process address mapping that addr falls in,
        or None if no such mapping is found in self.
        '''
        for map in self:
            if map.start <= addr < map.end:
                return map
        return None

class Instruction():
    '''
    A disassembled instruction. Notable attributes include:

    gdbstr - the original string of disassembly that was used to
        populate this instance
    operands - a list of Operand instances corresponding this
        Instruction's operands, ordered by operand_order
        See Operand class documentation for details
    mnemonic - the instruction mnemonic (ex: "pop" or "rep mov")
    dest - the destination Operand of this instruction (optional--
        only exists if this Instruction has a logical destination
        operand)
    source - the source Operand of this instruction (optional)
    aux - the aux Operand of this instruction (optional)
    '''
    operand_order = ["dest", "source", "aux"]
    x86_prefixes = ["rep", "repe", "repz", "repne", "repnz"]
    regexs = {'hex_int' : re.compile(".*(0x\S*).*"),
              'dis_fail' : re.compile(".*<internal disassembler error>.*|"
                                      ".*No function contains specified address.*")}

    def __init__(self, gdbstr):
        '''
        Parses gdbstr to populate self.
        '''
        self.gdbstr = gdbstr
        if re.match(self.regexs['dis_fail'], self.gdbstr):
            raise GdbWrapperError("Disassembler error detected: %s"
                                  % self.gdbstr)
        self.operands = []

        # get instruction addr and instruction string
        inst = filter(None, self.gdbstr.split(":",1)[1].lstrip().rstrip().split(" "))
        self.addr = int(re.match(Instruction.regexs["hex_int"],
                            self.gdbstr.split(":",1)[0]).group(1), 16)

        # get mnemonic
        if inst[0] in Instruction.x86_prefixes: # handle rep* prefixes
            self.mnemonic = " ".join(inst[0:2])
            inst = " ".join(inst[2:])
        else:
            self.mnemonic = inst[0]
            inst = " ".join(inst[1:])
            if inst == "": # handle "ret", "iret", et al.
                return

        # get operands
        [self.operands.append(Operand(e)) for e in inst.split(",")]
        for i in range(0, len(self.operands)):
            setattr(self, Instruction.operand_order[i], self.operands[i])

    def __str__(self):
        return self.gdbstr

class Operand():
    '''
    A disassembled instruction operand that can be evaluated. Notable
    attributes include:

    gdbstr - the original string of disassembled operand that was used to
        populate this instance
    is_pointer - True if this operand represents a pointer, False otherwise.
        For example, "DWORD PTR [eax]" is considered a pointer, "eax" is not.
    regs - a list of strings representing the registers used in this Operand
    '''
    _regexs = {'addr'   : re.compile(".*\[(.*)\]|" # DWORD PTR [eax+0x1234]
                                     ".*\:(.*)"),  # DWORD PTR gs:0x18
               'p_regs' : re.compile("eiz|riz"),
               'regstrs' : re.compile(
                    "^([a-z]{1,5}\d{0,2}[a-z]{0,1})(?=\s*[\*\+-])|"               # "(eax)+0x1234"
                    "(?<=[\*\+-])\s*([a-z]{1,5}\d{0,2}[a-z]{0,1})(?=[\s*\*\+-])|" # "xmm5+(al)-0x1234"
                    "(?<=[\*\+-])\s*([a-z]{1,5}\d{0,2}[a-z]{0,1})$|"              # "eax+4* (esi)"
                    "^([a-z]{1,5}\d{0,2}[a-z]{0,1})$"),                           # "(edi)"
               'fpu_stack' : re.compile("(st\(.*\))")}

    def __init__(self, gdbstr):
        '''
        Parses gdbstr to populate self.  ex gdbstr="BYTE PTR es:[edi]"
        '''
        self.gdbstr = gdbstr
        gdbstr = gdbstr.split("<")[0] # get rid of "<addr resolutions>"

        # get addr
        addr = re.match(Operand._regexs['addr'], gdbstr) # ignores segment regs
        if addr:
            self.is_pointer = True
            expr = filter(None, addr.groups())[0]
        else:
            self.is_pointer = False
            expr = gdbstr

        # eiz(x86) and riz(64) are psuedo-index registers that
        # the CPU always evaluates to 0. GDB types them as void (DNE)
        # which can cause a type error when evaluating expression via GDB,
        # so we substitute immediate zeros
        expr = re.sub(Operand._regexs['p_regs'], "0", expr)

        self.regs = ["".join(list(t)) for t in\
                     re.findall(Operand._regexs['regstrs'], expr)]
        # prep for GDB evaluation. ex: "edx+0x12" becomes "$edx+0x12"
        self.expr = re.sub(Operand._regexs['regstrs'],
                      (lambda mo: "$" + filter(None, mo.groups())[0]),
                      expr)


    def eval(self):
        '''
        Returns the integer value of this operand as evaluated by GDB. For
        example, if eax = 0x5 and this operand is DWORD PTR[eax + 0x100],
        eval would be 0x105 (a GDB.Value)
        '''
        if "mm" in " ".join(self.regs) or re.match(self._regexs['fpu_stack'], self.gdbstr):
            return 0xDEADBEEF
        else:
            # Some GDBs (GDB 7.2 Fedora vs. Ubuntu/Debian) don't compare
            # signed and unsigned integer types consistently. gdb_uint
            # is a workaround
            return gdb_uint(gdb.parse_and_eval(self.expr))

    def __str__(self):
        return self.gdbstr

class Backtrace(list):
    '''
    A backtrace composed of a list of GDB Frames (ordered from innermost to
    outermost) and other attributes, including:

    hash - an object that can be used to determine uniqueness: note that
        Backtrace does not implement hashing from the Python data model (yet)
    abnormal_termination - set to True if GDB's backtrace unwind terminated
        abnormally, such as in the case of stack corruption
    '''
    def __init__(self, blacklist=None, major_depth=5):
        '''
        Uses the GDB API to populate self. Any frames in
        blacklist are marked as such. The first non-blacklisted,
        major_depth frames are used to calculate the
        backtrace's major hash.
        '''
        self.blacklist = blacklist

        frame = gdb.selected_frame()
        hc = 0
        i = 0
        maj = "0"
        min = "0"
        self.abnormal_termination = False
        while(frame):
            frame = Frame(frame, i)

            # The check below is a workaround for a known libc/gdb runaway
            # backtrace issue, see
            # http://sourceware.org/ml/libc-alpha/2012-03/msg00573.html
            if frame.name() and "libc_start_main" in frame.name():
                break

            if not self._in_blacklist(frame):
                if hc < major_depth:
                    maj = hashlib.md5(maj + str(frame)).hexdigest()
                min = hashlib.md5(min + str(frame)).hexdigest()
                hc+=1
            else:
                frame.blacklisted = True
            self.append(frame)

            # some versions of the GDB Python API do not expose a frame unwind
            # error sentinel; if it is not available a hardcoded value based on
            # an enum from GDB's gdb/frames.h is used
            unwind_error = getattr(gdb, "FRAME_UNWIND_FIRST_ERROR", 3)
            if frame.unwind_stop_reason() >= unwind_error:
                self.abnormal_termination = True
                break

            try:
                frame = frame.older()
            except RuntimeError:
                self.abnormal_termination = True
                break
            i += 1

        self.hash = AttrDict(major=maj, minor=min)

    def _in_blacklist(self, frame):
        '''
        Returns True if frame is in self.blacklist, False otherwise.
        '''
        if not self.blacklist:
            return False
        if frame.name() in self.blacklist.functions:
            return True
        if frame.mapped_region and \
           re.match(self.blacklist.map_regex, frame.mapped_region):
            return True
        if not frame.name() and not frame.mapped_region:
            return True
        return False

    def __str__(self):
        result = "\n".join([str(f) for f in self])
        if self.abnormal_termination:
            reason = self[len(self) - 1].unwind_stop_reason()
            result += "\nabnormal stack unwind termination: %s" % \
                gdb.frame_stop_reason_string(reason)
        return result

class Frame():
    '''
    Wrapper for gdb.Frame. Adds frame position, pretty string, etc.

    Note: Because GDB Python API classes cannot be inherited, this class
    overloads __getattr__ to pass method calls to gdb.Frame. This may cause
    some issues with getting/setting this class's state, such as during
    pickling.
    '''
    def __init__(self, gdb_frame, position=None):
        self.gdb_frame = gdb_frame
        self.position = position
        self.blacklisted = False
        self.mapped_region = getTarget().procmaps().findByAddr(self.pc())
        if self.mapped_region:
            self.mapped_region = self.mapped_region.name

    def __getattr__(self, name):
        return getattr(self.gdb_frame, name)

    def __str__(self):
        desc = "#%3d %s at 0x%x in %s" % \
            (self.position, self.name(), self.pc(), self.mapped_region)
        if self.blacklisted:
            desc += " (BL)"
        return desc

class Target():
    '''
    A wrapper for a Linux GDB Inferior. Includes of various convenience
    methods used for classification.

    WARNING: Methods in this object may change the state of GDB. For
    example, the disassembly flavor may be left as "intel"
    after this code is executed.
    '''
    _regexs = {'info_frame' : re.compile("^ *eip = ([^\s;]*)(?: in )?" # addr
                                         "([^\s;]*)" # fname
                                         "([^\s;]*)") } # source_file:line

    # these functions and libs are not considered to be at fault for a crash
    blacklist = AttrDict(functions=('__kernel_vsyscall', 'abort', 'raise',
                                    'malloc', 'free', '*__GI_abort',
                                    '*__GI_raise', 'malloc_printerr',
                                    '__libc_message', '_int_malloc',
                                    '_int_free', ),
                         map_regex=re.compile(".*/libc.*|.*/libm.*"))

    _si_signo = None;
    _si_addr = None;

    def __init__(self):
        self.check_inferior_state()
        gdb.execute("set disassembly-flavor intel", False, True)

    def check_inferior_state(self):
        if len(gdb.inferiors()) != 1:
            raise GdbWrapperError("Unsupported number of inferiors (%d)"
                                      % len(gdb.inferiors()))
        if len(gdb.inferiors()[0].threads()) == 0:
            raise GdbWrapperError("No threads running")
        if not gdb.inferiors()[0].threads()[0].is_stopped:
            raise GdbWrapperError("Inferior's primary thread is not stopped")

    def backtrace(self):
        return Backtrace(self.blacklist)

    def hash(self):
        return self.backtrace().hash

    def procmaps(self):
        return ProcMaps()

    def faulting_frame(self):
        for frame in self.backtrace():
            if not frame.blacklisted:
                return frame
        warnings.warn("All frames blacklisted")
        return None

    def current_instruction(self):
        try:
            addr = self.pc()
            gdbstr = gdb.execute("x/i 0x%x" % addr,
                                 False, True).splitlines()[0]
            return Instruction(gdbstr)
        except RuntimeError as e:
            return None

    def pc(self):
        return gdb_uint(gdb.parse_and_eval("$pc"))

    def stack_pointer(self):
        return gdb_uint(gdb.parse_and_eval("$sp"))

    def pid(self):
        return gdb.inferiors()[0].pid

    def pointer_size(self):
        import platform
        bits = platform.architecture()[0]
        return int(re.match("^([\d]*)", bits).groups()[0]) / 8

    def si_signo(self):
        # This is a workaround to a bug in the GDB Python API:
        # The only reliable way to cause GDB to raise an exception when
        # $_siginfo is not available it to call __str__() -- otherwise
        # (such as when casting the Gdb.Value to another type), GDB may
        # force Python to abruptly exit rather than raising an exception
        if not Target._si_signo:
            self.get_siginfo()
        return Target._si_signo

    def si_addr(self):
        if not Target._si_addr:
            self.get_siginfo()
        return Target._si_addr

    def get_siginfo(self):
        obj = gdb.parse_and_eval("$_siginfo")
        Target._si_signo = str(obj['si_signo'])
        Target._si_addr = str(obj['_sifields']['_sigfault']['si_addr'])


def getTarget():
    '''
    Returns the current Target, which is a Python wrapper representing the
    current state of the underlying Linux GDB Inferior object.
    '''
    return Target()
