ANSSI-FR/polichombr

View on GitHub
polichombr/models/sample.py

Summary

Maintainability
D
2 days
Test Coverage
"""
    This file is part of Polichombr.

    (c) 2016 ANSSI-FR


    Description:
        Contains all the model for the samples,
        including the corresponding relations
"""

from marshmallow import fields

from polichombr import db, ma

from polichombr.models.models import CustomEnum, TLPLevel
from polichombr.models.analysis import AnalysisResultSchema


class SampleMetadata(db.Model):

    """
        Generic table used to store generic file metadata. Type must be
        defined in the SampleMetadataType enum class below. Value contains
        the metadata itself.
    """
    __tablename__ = 'samplemetadata'
    id = db.Column(db.Integer, primary_key=True)
    type_id = db.Column(db.Integer(), index=True)
    value = db.Column(db.String())
    sample_id = db.Column(db.Integer(), db.ForeignKey("sample.id"), index=True)


class SampleMetadataType(CustomEnum):

    """
        Possible keys for file metadata.
    """
    (
        PE_DOS_HEADER_e_magic,
        PE_DOS_HEADER_e_cblp,
        PE_DOS_HEADER_e_cp,
        PE_DOS_HEADER_e_crlc,
        PE_DOS_HEADER_e_cparhdr,
        PE_DOS_HEADER_e_minalloc,
        PE_DOS_HEADER_e_maxalloc,
        PE_DOS_HEADER_e_ss,
        PE_DOS_HEADER_e_sp,
        PE_DOS_HEADER_e_csum,
        PE_DOS_HEADER_e_ip,
        PE_DOS_HEADER_e_cs,
        PE_DOS_HEADER_e_lfarlc,
        PE_DOS_HEADER_e_ovno,
        PE_DOS_HEADER_e_res,
        PE_DOS_HEADER_e_oemid,
        PE_DOS_HEADER_e_oeminfo,
        PE_DOS_HEADER_e_res2,
        PE_DOS_HEADER_e_lfanew,
        PE_FILE_HEADER_Machine,
        PE_FILE_HEADER_NumberOfSections,
        PE_FILE_HEADER_TimeDateStamp,
        PE_FILE_HEADER_PointerToSymbolTable,
        PE_FILE_HEADER_NumberOfSymbols,
        PE_FILE_HEADER_SizeOfOptionalHeader,
        PE_FILE_HEADER_Characteristics,
        PE_OPTIONAL_HEADER_Magic,
        PE_OPTIONAL_HEADER_MajorLinkerVersion,
        PE_OPTIONAL_HEADER_MinorLinkerVersion,
        PE_OPTIONAL_HEADER_SizeOfCode,
        PE_OPTIONAL_HEADER_SizeOfInitializedData,
        PE_OPTIONAL_HEADER_SizeOfUninitializedData,
        PE_OPTIONAL_HEADER_AddressOfEntryPoint,
        PE_OPTIONAL_HEADER_BaseOfCode,
        PE_OPTIONAL_HEADER_ImageBase,
        PE_OPTIONAL_HEADER_SectionAlignment,
        PE_OPTIONAL_HEADER_FileAlignment,
        PE_OPTIONAL_HEADER_MajorOperatingSystemVersion,
        PE_OPTIONAL_HEADER_MinorOperatingSystemVersion,
        PE_OPTIONAL_HEADER_MajorImageVersion,
        PE_OPTIONAL_HEADER_MinorImageVersion,
        PE_OPTIONAL_HEADER_MajorSubsystemVersion,
        PE_OPTIONAL_HEADER_MinorSubsystemVersion,
        PE_OPTIONAL_HEADER_Reserved1,
        PE_OPTIONAL_HEADER_SizeOfImage,
        PE_OPTIONAL_HEADER_SizeOfHeaders,
        PE_OPTIONAL_HEADER_CheckSum,
        PE_OPTIONAL_HEADER_Subsystem,
        PE_OPTIONAL_HEADER_DllCharacteristics,
        PE_OPTIONAL_HEADER_SizeOfStackReserve,
        PE_OPTIONAL_HEADER_SizeOfStackCommit,
        PE_OPTIONAL_HEADER_SizeOfHeapReserve,
        PE_OPTIONAL_HEADER_SizeOfHeapCommit,
        PE_OPTIONAL_HEADER_LoaderFlags,
        PE_OPTIONAL_HEADER_NumberOfRvaAndSizes,
        PE_import_hash) = list(range(1, 57))


class StringsItem(db.Model):

    """
    Strings contained in a binary file. Strings types are defined by the
    StringsType enum class.
    """
    __tablename__ = 'stringsitem'
    id = db.Column(db.Integer, primary_key=True)
    string_type = db.Column(db.Integer(), index=True)
    string_value = db.Column(db.String())
    sample_id = db.Column(db.Integer(), db.ForeignKey("sample.id"), index=True)


class StringsType(CustomEnum):

    """
    Strings types.
    """
    (
        UNICODE,
        ASCII,
        BUILDED,    # builded on stack
        UNPACKED    # extracted after unpacking in IDAPro
    ) = list(range(1, 5))


class FunctionInfo(db.Model):

    """
        Function information. Contains function's name, machoc hash and
        address. Used for quick function access. Machoc hash can be
        updated by tasks or by skelenox itself.
    """
    __tablename__ = 'functioninfo'
    id = db.Column(db.Integer, primary_key=True)
    address = db.Column(db.BigInteger(), index=True)
    name = db.Column(db.String(), index=True)
    machoc_hash = db.Column(db.BigInteger(), index=True)
    sample_id = db.Column(db.Integer(), db.ForeignKey("sample.id"), index=True)


class SampleMatch(db.Model):

    """
        Match between samples. Used to spot samples similarities on
        analysis. Displayed to user.
    """
    __tablename__ = 'samplematch'
    id = db.Column(db.Integer, primary_key=True)
    sid_1 = db.Column(db.Integer, db.ForeignKey('sample.id'), index=True)
    sid_2 = db.Column(db.Integer, db.ForeignKey('sample.id'), index=True)
    match_type = db.Column(db.String(), index=True)


class FileName(db.Model):

    """
        Sample's files names.
    """
    __tablename__ = 'filename'
    id = db.Column(db.Integer, primary_key=True)
    name = db.Column(db.String())
    sample_id = db.Column(db.Integer(), db.ForeignKey("sample.id"))


class AnalysisStatus(CustomEnum):

    """
        Sample's analysis status (enum). Used for analysis scheduling
        and in samples views.
    """
    (
        FINISHED,
        RUNNING,
        TOSTART
    ) = list(range(1, 4))


class CheckList(db.Model):

    """
        Checklist fields and description. This is a global information,
        set in the admin panel, links will just determine if checked
        or not.
    """
    __tablename__ = 'checklist'
    id = db.Column(db.Integer, primary_key=True)
    title = db.Column(db.String())
    description = db.Column(db.String())


sampletochecklist = db.Table('sampletochecklist',
                             db.Column('checklist_id',
                                       db.Integer,
                                       db.ForeignKey('checklist.id')),
                             db.Column('sample_id',
                                       db.Integer,
                                       db.ForeignKey('sample.id')))

"""
    Matched Yara rules relationship.
"""
sampletoyara = db.Table('sampletoyara',
                        db.Column('yara_id',
                                  db.Integer,
                                  db.ForeignKey('yararule.id')),
                        db.Column('sample_id',
                                  db.Integer,
                                  db.ForeignKey('sample.id')))

"""
    IDA actions relationship.
"""
sampletoactions = db.Table('sampletoactions',
                           db.Column('sample_id',
                                     db.Integer,
                                     db.ForeignKey('sample.id'), index=True),
                           db.Column('action_id',
                                     db.Integer,
                                     db.ForeignKey('idaactions.id'),
                                     index=True))


class Sample(db.Model):

    """
    Samples model.
    """
    __tablename__ = 'sample'
    id = db.Column(db.Integer, primary_key=True)
    # N-N relationships
    check_list = db.relationship('CheckList',
                                 secondary=sampletochecklist,
                                 backref=db.backref('samples', lazy='dynamic'))
    actions = db.relationship('IDAAction',
                              secondary=sampletoactions,
                              backref=db.backref('samples', lazy='dynamic'))
    yaras = db.relationship('YaraRule',
                            secondary=sampletoyara,
                            backref=db.backref('samples', lazy='dynamic'))
    # Enriched N-N relationships (double link)
    linked_samples = db.relationship('SampleMatch',
                                     backref=db.backref('sample1',
                                                        remote_side=[id]),
                                     foreign_keys=[SampleMatch.sid_1])
    linked_samples_2 = db.relationship('SampleMatch',
                                       backref=db.backref('sample2',
                                                          remote_side=[id]),
                                       foreign_keys=[SampleMatch.sid_2])
    # 1-N relationships
    strings = db.relationship(
        "StringsItem", backref=db.backref(
            'sample', remote_side=[id]))
    s_metadata = db.relationship(
        "SampleMetadata", backref=db.backref(
            'sample', remote_side=[id]))
    functions = db.relationship(
        "FunctionInfo", backref=db.backref(
            'sample', remote_side=[id]), lazy="dynamic")
    filenames = db.relationship(
        "FileName", backref=db.backref(
            'sample', remote_side=[id]))
    analysis_data = db.relationship(
        'AnalysisResult', backref=db.backref(
            "sample", remote_side=[id]))
    # Sample's binary path
    storage_file = db.Column(db.String())
    # File size
    size = db.Column(db.Integer())
    # File's internal date (compilation timestamp, etc.)
    file_date = db.Column(db.DateTime(), index=True)
    # Hashes
    md5 = db.Column(db.String(32), index=True, nullable=False)
    sha1 = db.Column(db.String(40), index=True, nullable=False)
    sha256 = db.Column(db.String(64), index=True, nullable=False)
    # Mime type
    mime_type = db.Column(db.String())
    full_mime_type = db.Column(db.String())
    # Abstract, set by user
    abstract = db.Column(db.String())
    # Import hash, set by tasks
    import_hash = db.Column(db.String(), index=True)
    # TLP level, mandatory
    TLP_sensibility = db.Column(
        db.Integer(),
        nullable=False,
        default=TLPLevel.TLPAMBER)
    # Analysis status
    analysis_status = db.Column(
        db.Integer(),
        nullable=False,
        default=AnalysisStatus.TOSTART)
    # Sample's analysis date
    analysis_date = db.Column(db.DateTime())
    # "status" is not used, for now
    # status = db.Column(db.Integer())

    def __repr__(self):
        return 'Sample %d' % self.id


class FunctionInfoSchema(ma.ModelSchema):

    """
        Marshmallow wrapper for FunctionInfo model
    """
    class Meta:
        fields = ('id',
                  'address',
                  'name',
                  'sample_id',
                  'machoc_hash')


class SampleMatchSchema(ma.ModelSchema):

    """
    Match schema.
    """
    class Meta:
        fields = ('id',
                  'sid_1',
                  'sid_2',
                  'match_type')


class SampleSchema(ma.ModelSchema):

    """
    Sample schema.
    """
    families = fields.Nested('FamilySchema', only=['id', 'name'])
    users = fields.Nested('UserSchema', only=['id', 'nickname'])
    analysis_data = fields.Nested(AnalysisResultSchema,
                                  many=True,
                                  only=['id', 'type'])
    linked_samples = fields.Nested(SampleMatchSchema,
                                   many=True,
                                   only=['sid_2', 'match_type'])

    class Meta:

        """
            See flask-marshmallow doc
        """
        fields = ('id',
                  'md5',
                  'sha1',
                  'sha256',
                  'size',
                  'mime_type',
                  'full_mime_type',
                  'analysis_status',
                  'analysis_date',
                  'file_date',
                  'TLP_sensibility',
                  'linked_samples',
                  'abstract')