SpamExperts/OrangeAssassin

View on GitHub
oa/received_parser.py

Summary

Maintainability
F
3 days
Test Coverage
"""
Parser for Received headers
It extracts the following metadata:
:rdns
:ip
:helo
:by
:ident
:id
:auth
"""

import re
from oa.regex import Regex

LOCALHOST = Regex(r"""
(?:
              # as a string
              localhost(?:\.localdomain)?
            |
              \b(?<!:)    # ensure no "::" IPv6 marker before this one
              # plain IPv4
              127\.0\.0\.1 \b
            |
              # IPv6 addresses
              # don't use \b here, it hits on :'s
              (?:IPv6:    # with optional prefix
                        | (?<![a-f0-9:])
                      )
              (?:
            # IPv4 mapped in IPv6
            # note the colon after the 12th byte in each here
            (?:
              # first 6 (12 bytes) non-zero
              (?:0{1,4}:){5}        ffff:
              |
              # leading zeros omitted (note {0,5} not {1,5})
              ::(?:0{1,4}:){0,4}        ffff:
              |
              # trailing zeros (in the first 6) omitted
              (?:0{1,4}:){1,4}:        ffff:
              |
              # 0000 in second up to (including) fifth omitted
              0{1,4}::(?:0{1,4}:){1,3}    ffff:
              |
              # 0000 in third up to (including) fifth omitted
              (?:0{1,4}:){2}:0{1,2}:    ffff:
              |
              # 0000 in fourth up to (including) fifth omitted
              (?:0{1,4}:){3}:0:        ffff:
              |
              # 0000 in fifth omitted
              (?:0{1,4}:){4}:        ffff:
            )
            # and the IPv4 address appended to all of the 12 bytes above
            127\.0\.0\.1    # no \b, we check later

            | # or (separately) a pure IPv6 address

            # all 8 (16 bytes) of them present
            (?:0{1,4}:){7}            0{0,3}1
            |
            # leading zeros omitted
            :(?::0{1,4}){0,6}:        0{0,3}1
            |
            # 0000 in second up to (including) seventh omitted
            0{1,4}:(?::0{1,4}){0,5}:    0{0,3}1
            |
            # 0000 in third up to (including) seventh omitted
            (?:0{1,4}:){2}(?::0{1,4}){0,4}:    0{0,3}1
            |
            # 0000 in fouth up to (including) seventh omitted
            (?:0{1,4}:){3}(?::0{1,4}){0,3}:    0{0,3}1
            |
            # 0000 in fifth up to (including) seventh omitted
            (?:0{1,4}:){4}(?::0{1,4}){0,2}:    0{0,3}1
            |
            # 0000 in sixth up to (including) seventh omitted
            (?:0{1,4}:){5}(?::0{1,4}){0,1}:    0{0,3}1
            |
            # 0000 in seventh omitted
            (?:0{1,4}:){6}:            0{0,3}1
              )
              (?![a-f0-9:])
            )
""", re.I | re.X)

IP_PRIVATE = Regex(r"""
^(?:
  (?:   # IPv4 addresses
    10|                    # 10.0.0.0/8      Private Use (5735, 1918)
    127|                            # 127.0.0.0/8     Host-local  (5735, 1122)
    169\.254|                # 1690.0/12   Private Use (5735, 1918)
    192\.168|                 # 192.168.0.0/16  Private Use (5735, 1918)
    100\.(?:6[4-9]|[7-9][0-9]|1[01][0-9]|12[0-7])  # 100.64.0.0/10 CGN (6598)
    )\..*
|
  (?:   # IPv6 addresses
    # don't use \b here, it hits on :'s
    (?:IPv6:    # with optional prefix
      | (?<![a-f0-9:])
    )
    (?:
      # IPv4 mapped in IPv6
      # note the colon after the 12th byte in each here
      (?:
        # first 6 (12 bytes) non-zero
        (?:0{1,4}:){5}        ffff:
        |
        # leading zeros omitted (note {0,5} not {1,5})
        ::(?:0{1,4}:){0,4}        ffff:
        |
        # trailing zeros (in the first 6) omitted
        (?:0{1,4}:){1,4}:        ffff:
        |
        # 0000 in second up to (including) fifth omitted
        0{1,4}::(?:0{1,4}:){1,3}    ffff:
        |
        # 0000 in third up to (including) fifth omitted
        (?:0{1,4}:){2}:0{1,2}:    ffff:
        |
        # 0000 in fourth up to (including) fifth omitted
        (?:0{1,4}:){3}:0:        ffff:
        |
        # 0000 in fifth omitted
        (?:0{1,4}:){4}:        ffff:
      )
      # and the IPv4 address appended to all of the 12 bytes above
      (?:
        10|
        127|
        169\.254|
        172\.(?:1[6-9]|2[0-9]|3[01])|
        192\.168|
        100\.(?:6[4-9]|[7-9][0-9]|1[01][0-9]|12[0-7])
      )\..*

    | # or IPv6 link-local address space, fe80::/10
      fe[89ab][0-9a-f]:.*

    | # or the host-local ::1 addr, as a pure IPv6 address

      # all 8 (16 bytes) of them present
      (?:0{1,4}:){7}            0{0,3}1
      |
      # leading zeros omitted
      :(?::0{1,4}){0,6}:        0{0,3}1
      |
      # 0000 in second up to (including) seventh omitted
      0{1,4}:(?::0{1,4}){0,5}:    0{0,3}1
      |
      # 0000 in third up to (including) seventh omitted
      (?:0{1,4}:){2}(?::0{1,4}){0,4}:    0{0,3}1
      |
      # 0000 in fouth up to (including) seventh omitted
      (?:0{1,4}:){3}(?::0{1,4}){0,3}:    0{0,3}1
      |
      # 0000 in fifth up to (including) seventh omitted
      (?:0{1,4}:){4}(?::0{1,4}){0,2}:    0{0,3}1
      |
      # 0000 in sixth up to (including) seventh omitted
      (?:0{1,4}:){5}(?::0{1,4}){0,1}:    0{0,3}1
      |
      # 0000 in seventh omitted
      (?:0{1,4}:){6}:            0{0,3}1
    )
    (?![a-f0-9:])
  )
)
""", re.X | re.I)

IP_ADDRESS = Regex(r"""
            (?:
              \b(?<!:)    # ensure no "::" IPv4 marker before this one
              # plain IPv4, as above
              (?:1\d\d|2[0-4]\d|25[0-5]|\d\d|\d)\.
              (?:1\d\d|2[0-4]\d|25[0-5]|\d\d|\d)\.
              (?:1\d\d|2[0-4]\d|25[0-5]|\d\d|\d)\.
              (?:1\d\d|2[0-4]\d|25[0-5]|\d\d|\d)\b
            |
              # IPv6 addresses
              # don't use \b here, it hits on :'s
              (?:IPv6:    # with optional prefix
                        | (?<![a-f0-9:])
                      )
              (?:
            # IPv4 mapped in IPv6
            # note the colon after the 12th byte in each here
            (?:
              # first 6 (12 bytes) non-zero
              (?:[a-f0-9]{1,4}:){6}
              |
              # leading zeros omitted (note {0,5} not {1,5})
              ::(?:[a-f0-9]{1,4}:){0,5}
              |
              # trailing zeros (in the first 6) omitted
              (?:[a-f0-9]{1,4}:){1,5}:
              |
              # 0000 in second up to (including) fifth omitted
              [a-f0-9]{1,4}::(?:[a-f0-9]{1,4}:){1,4}
              |
              # 0000 in third up to (including) fifth omitted
              (?:[a-f0-9]{1,4}:){2}:(?:[a-f0-9]{1,4}:){1,3}
              |
              # 0000 in fourth up to (including) fifth omitted
              (?:[a-f0-9]{1,4}:){3}:(?:[a-f0-9]{1,4}:){1,2}
              |
              # 0000 in fifth omitted
              (?:[a-f0-9]{1,4}:){4}:[a-f0-9]{1,4}:
            )
            # and the IPv4 address appended to all of the 12 bytes above
            (?:1\d\d|2[0-4]\d|25[0-5]|\d\d|\d)\.
            (?:1\d\d|2[0-4]\d|25[0-5]|\d\d|\d)\.
            (?:1\d\d|2[0-4]\d|25[0-5]|\d\d|\d)\.
            (?:1\d\d|2[0-4]\d|25[0-5]|\d\d|\d)   # no \b, we check later

            | # or (separately) a pure IPv6 address

            # all 8 (16 bytes) of them present
            (?:[a-f0-9]{1,4}:){7}[a-f0-9]{1,4}
            |
            # leading zeros omitted
            :(?::[a-f0-9]{1,4}){1,7}
            |
            # trailing zeros omitted
            (?:[a-f0-9]{1,4}:){1,7}:
            |
            # 0000 in second up to (including) seventh omitted
            [a-f0-9]{1,4}:(?::[a-f0-9]{1,4}){1,6}
            |
            # 0000 in third up to (including) seventh omitted
            (?:[a-f0-9]{1,4}:){2}(?::[a-f0-9]{1,4}){1,5}
            |
            # 0000 in fouth up to (including) seventh omitted
            (?:[a-f0-9]{1,4}:){3}(?::[a-f0-9]{1,4}){1,4}
            |
            # 0000 in fifth up to (including) seventh omitted
            (?:[a-f0-9]{1,4}:){4}(?::[a-f0-9]{1,4}){1,3}
            |
            # 0000 in sixth up to (including) seventh omitted
            (?:[a-f0-9]{1,4}:){5}(?::[a-f0-9]{1,4}){1,2}
            |
            # 0000 in seventh omitted
            (?:[a-f0-9]{1,4}:){6}:[a-f0-9]{1,4}
            |
            # :: (the unspecified address 0:0:0:0:0:0:0:0)
            # dos: I don't expect to see this address in a header, and
            # it may cause non-address strings to match, but we'll
            # include it for now since it is valid
            ::
              )
              (?![a-f0-9:])
            )""", re.X)

IPFRE = Regex(r"[\[ \(]{1}[a-fA-F\d\.\:]{7,}?[\] \n;\)]{1}")

FETCHMAIL = Regex(r"""
.*?\s(\S+)\s(?:\[({IP_ADDRESS})\]\s)?
by\s(\S+)\swith
\s\S+\s\(fetchmail""".format(IP_ADDRESS=IP_ADDRESS.pattern), re.X)

LOCALHOST_RE = Regex(r"""
^\S+\s\([^\s\@]+\@{LOCALHOST}\)\sby\s\S+\s\(
""".format(LOCALHOST=LOCALHOST.pattern), re.X | re.I)

UNKNOWN_RE_RDNS = Regex(r"""
^(\S+)\s\((unknown)\s\[({IP_ADDRESS})\]\)\s\(
""".format(IP_ADDRESS=IP_ADDRESS.pattern), re.X)

# ================ check_for_skip regex ==================
WITH_LOCAL_RE = Regex(r'\bwith local(?:-\S+)? ', re.I)
BSMTP_RE = Regex(r'^\S+ by \S+ with BSMTP', re.I)
CONTENT_TECH_RE = Regex(r"""
^\S+\s\(\S+\)\sby\s\S+\s\(Content\sTechnologies\s""", re.X | re.I)
AVG_SMTP_RE = Regex(r'^127\.0\.0\.1 \(AVG SMTP \S+ \[\S+\]\)')
QMAIL_RE = Regex(r'^\S+\@\S+ by \S+ by uid \S+ ')
FROM_RE = Regex(r'^\S+\@\S+ by \S+ ')
UNKNOWN_RE = Regex(r'^Unknown\/Local \(')
AUTH_SKIP_RE = Regex(r'^\(AUTH: \S+\) by \S+ with ')
LOCAL_SKIP_RE = Regex(r"""
^localhost\s\(localhost\s\[\[UNIX:\slocalhost\]\]\)\sby\s""", re.X)
AMAZON_RE = Regex(r"""
^\S+\.amazon\.com\sby
\s\S+\.amazon\.com\swith\sESMTP\s\(peer\scrosscheck:\s""", re.X)
NOVELL_RE = Regex(r'^[^\.]+ by \S+ with Novell_GroupWise')
NO_NAME_RE = Regex(r'^no\.name\.available by \S+ via smtpd \(for ')
SMTPSVC_RE = Regex(r"""
^mail\spickup\sservice\sby\s(\S+)\swith\sMicrosoft\sSMTPSVC$""", re.X)

# ========================================================

# ================ function regex ====================
ENVFROM_RE = Regex(r"""
.*?(?:return-path:?\s|envelope-(?:sender|from)[\s=])(\S+)\b""", re.X)
RDNS_RE = Regex(r'^(\S+) ')
RDNS_RE2 = Regex(r'^(\S+)\(')
HEADER_RE = Regex(r"""
    ^\(?\[?({IP_ADDRESS})\]?\)?\sby
""".format(IP_ADDRESS=IP_ADDRESS.pattern), re.X)
RDNS_IP_RE = Regex(r"""
    ^\[({IP_ADDRESS})\]
""".format(IP_ADDRESS=IP_ADDRESS.pattern), re.X)
RDNS_SMTP = Regex(r"""
    ^(\S+)\s\(\s?{IP_ADDRESS}\)\sby.*\({IP_ADDRESS}\)\swith.*(ESMTP|SMTP)
""".format(IP_ADDRESS=IP_ADDRESS.pattern), re.X)
RDNS_SMTP1 = Regex(r"""
    ^(\S+)\s\(\s?\[{IP_ADDRESS}\]\)\sby.*(\S+)\swith.*(esmtp|smtp|ESMTP|SMTP)
""".format(IP_ADDRESS=IP_ADDRESS.pattern), re.X)
RDNS_RE4 = Regex(r"""
    ^((\S+)\s\(\[{IP_ADDRESS})(?:[.:]\d+)?\]\).*?\sby\s(\S+)
""".format(IP_ADDRESS=IP_ADDRESS.pattern), re.X)
RDNS_RE1 = Regex(r"""
    ^\(\[({IP_ADDRESS})\]\)\sby\s(\S+)
""".format(IP_ADDRESS=IP_ADDRESS.pattern), re.X)
RDNS_RE3 = Regex(r"""
    ^(\S+)\s\[({IP_ADDRESS})\]\sby\s(\S+)\s\[({IP_ADDRESS})]
""".format(IP_ADDRESS=IP_ADDRESS.pattern), re.X)
BY_RE = Regex(r'.*? by (\S+) .*')
HELO_RE = Regex(r'.*?\((?:HELO|EHLO) (\S*)\)', re.I)
HELO_RE10 = Regex(r'.*\(.* (HELO|EHLO) (\S+)\)', re.I)
HELO_RE2 = Regex(r"""
    .*?\((\S+)\s\[{IP_ADDRESS}\]\)
""".format(IP_ADDRESS=IP_ADDRESS.pattern), re.X)
HELO_RE3 = Regex(r'.*?helo=(\S+)\)', re.I)
HELO_RE4 = Regex(r"""
    ^\(?(\S+)\s\(?\s?\[{IP_ADDRESS}\]\)
""".format(IP_ADDRESS=IP_ADDRESS.pattern), re.X)
HELO_RE5 = Regex(r"""
    ^(\S+)\s\(\s?{IP_ADDRESS}\)
""".format(IP_ADDRESS=IP_ADDRESS.pattern), re.X)
HELO_RE6 = Regex(r"""
    ^(\S+)\s\(\[{IP_ADDRESS}\]\s\[{IP_ADDRESS}\]\)
""".format(IP_ADDRESS=IP_ADDRESS.pattern), re.X)
HELO_RE7 = Regex(r"""
    ^(\S+)\s\((\S+)\s?\[{IP_ADDRESS}\].*\)
""".format(IP_ADDRESS=IP_ADDRESS.pattern), re.X)
HELO_RE8 = Regex(r"""
    ^\(?(\S+)\s\[\s?{IP_ADDRESS}\]
""".format(IP_ADDRESS=IP_ADDRESS.pattern), re.X)
HELO_RE9 = Regex(r"""
    ^\(?(\S+)\s\(?\s?\[{IP_ADDRESS}\] \s?(\S+)\)
""".format(IP_ADDRESS=IP_ADDRESS.pattern), re.X)
IDENT_RE = Regex(r'.*ident=(\S+)\)')
IDENT_RE2 = Regex(r'.*\((\S+)@')
ID_RE = Regex(r'.*id (\S+)')
AUTH_RE = Regex(r"""
.*?\swith\s((?:ES|L|UTF8S|UTF8L)MTPS?A|ASMTP|HTTPU?)(?:\s|;|$)""", re.X | re.I)
AUTH_VC_RE = Regex(r'.*? \(version=([^ ]+) cipher=([^\)]+)\)')
AUTH_RE2 = Regex(r'.*? \(authenticated as (\S+)\)')
AUTH_RE3 = Regex(r"""
\)\s\(Authenticated\ssender:\s\S+\)\sby\s\S+\s\(Postfix\)\swith\s""", re.X)
AUTH_RE4 = Regex(r'.* by (mail\.gmx\.(net|com)) \([^\)]+\) with ((ESMTP|SMTP))')
AUTH_RE5 = Regex(r'.* \(account .* by .* \(CommuniGate Pro ('
                      r'HTTP|SMTP)')

ORIGINATING_IP_HEADER_RE = r"^X-ORIGINATING-IP: ({}).*"

# ========================================================


class ReceivedParser(object):
    def __init__(self, received_headers):
        self.received_headers = list()
        self.received = list()
        for header in received_headers:
            if header.startswith('from'):
                header = re.sub(r'\s+', ' ', header)  # removing '\n\t' chars
                header = header.replace('from ', '', 1)
                header = header.split(';')[0]
                self.received_headers.append(header)
            elif header.startswith("X-ORIGINATING-IP"):
                self.received_headers.append(header)

        self._parse_message()

    @staticmethod
    def check_for_skip(header):
        """STUFF TO IGNORE

        # Received headers which doesn't start with 'from'
        # Skip fetchmail handovers
        # BSMTP != a TCP/IP handover, ignore it
        # Content Technology

        :return: True or False
        """
        # Received: from root by server6.seinternal.com with
        # local-spamexperts-generated (Exim 4.80) id 1abp1W-0007Xm-KO for
        # spam@spamexperts.wiredtree.com
        if WITH_LOCAL_RE.search(header):
            return True
        # Received: from cabbage.jmason.org [127.0.0.1]
        # by localhost with IMAP (fetchmail-5.9.0)
        # for jm@localhost (single-drop); Thu, 13 Mar 2003 20:39:56 -0800 (PST)
        if 'fetchmail' in header and FETCHMAIL.search(header):
            return True
        # Received: from faerber.muc.de by slarti.muc.de with BSMTP (rsmtp-qm)
        # for asrg@ietf.org; 7 Mar 2003 21:10:38 -0000
        if ' with BSMTP' in header and BSMTP_RE.search(header):
            return True
        # Received: from scv3.apple.com (scv3.apple.com) by mailgate2.apple.com
        # (Content Technologies SMTPRS 4.2.1) with ESMTP id <T61095998e1118164e
        # 13f8@mailgate2.apple.com>; Mon, 17 Mar 2003 17:04:54 -0800
        if CONTENT_TECH_RE.search(header):
            return True
        # Received: from raptor.research.att.com (bala@localhost) by
        # raptor.research.att.com (SGI-8.9.3/8.8.7) with ESMTP id KAA14788
        # for <asrg@example.com>; Fri, 7 Mar 2003 10:37:56 -0500 (EST)
        # make this localhost-specific, so we know it's safe to ignore
        if LOCALHOST_RE.search(header):
            return True
        # from 127.0.0.1 (AVG SMTP 7.0.299 [265.6.8]);
        # Wed, 05 Jan 2005 15:06:48 -0800
        if AVG_SMTP_RE.search(header):
            return True
        # from qmail-scanner-general-admin@lists.sourceforge.net by alpha by
        # uid 7791 with qmail-scanner-1.14 (spamassassin: 2.41.
        # Clear:SA:0(-4.1/5.0):. Processed in 0.209512 secs)
        if QMAIL_RE.search(header):
            return True
        # from DSmith1204@aol.com by imo-m09.mx.aol.com (mail_out_v34.13.)
        # id 7.53.208064a0 (4394); Sat, 11 Jan 2003 23:24:31 -0500 (EST)
        if FROM_RE.search(header):
            return True
        # from Unknown/Local ([?.?.?.?]) by mailcity.com; Fri, 17
        # Jan 2003 15:23:29 -0000
        if UNKNOWN_RE.search(header):
            return True
        # from (AUTH: e40a9cea) by vqx.net with esmtp (courier-0.40)
        # for <asrg@ietf.org>; Mon, 03 Mar 2003 14:49:28 +0000
        if AUTH_SKIP_RE.search(header):
            return True
        # from localhost (localhost [[UNIX: localhost]])
        # by home.barryodonovan.com
        # (8.12.11/8.12.11/Submit) id iBADHRP6011034; Fri, 10 Dec 2004 13:17:27
        if LOCAL_SKIP_RE.search(header):
            return True
        # Internal Amazon traffic
        # from dc-mail-3102.iad3.amazon.com by mail-store-2001.amazon.com with
        # ESMTP (peer crosscheck: dc-mail-3102.iad3.amazon.com)
        if AMAZON_RE.search(header):
            return True
        # from GWGC6-MTA by gc6.jefferson.co.us with Novell_GroupWise;
        #  Tue, 30 Nov 2004 10:09:15 -0700
        if NOVELL_RE.search(header):
            return True
        # Received: from no.name.available by [165.224.216.88] via smtpd
        # (for lists.sourceforge.net [66.35.250.206]) with ESMTP
        # These are from an internal host protected by a Raptor firewall,
        # to hosts outside the firewall.  We can only ignore the handover
        # since we don't have enough info in those headers; however, from
        # googling, it appears that all samples are cases where the handover is
        # safely ignored.
        if NO_NAME_RE.search(header):
            return True
        # from mail pickup service by www.fmwebsite.com with Microsoft SMTPSVC;
        # Tue, 12 Jan 2016 17:51:31 -0500
        if SMTPSVC_RE.search(header):
            return True
        return False

    @staticmethod
    def get_envfrom(header):
        """Parsing envelope-from or envelope-sender from Received header

        :param header: The received header without the 'from ' at the begin
        :return: envfrom if is found if not it returns an empty string
        """
        envfrom = ""
        try:
            envfrom = ENVFROM_RE.match(header).groups()[0]
            envfrom = envfrom.strip("><[]")
        except (AttributeError, IndexError):
            pass
        if '=' in envfrom:
            envfrom = envfrom.rsplit("=", 1)[1]
        return envfrom

    @staticmethod
    def get_rdns(header):
        """Parsing rdns from Received header

        :param header: The received header without the 'from ' at the begin
        :return: rdns if is found if not it returns an empty string
        """
        rdns = ""
        try:
            if HELO_RE7.match(header):
                rdns = HELO_RE7.match(header).groups()[1]
                if rdns == "softdnserr":
                    rdns = ""
            elif HELO_RE5.match(header):
                if "(Scalix SMTP Relay" in header:
                    rdns = ""
                else:
                    rdns = HELO_RE5.match(header).groups()[0]
            elif HELO_RE4.match(header) and "Exim" not in header:
                if RDNS_SMTP.match(header):
                    rdns = RDNS_SMTP.match(header).groups()[0]
                else:
                    rdns = ""
            elif HEADER_RE.match(header):
                rdns = ""
            elif RDNS_RE2.match(header):
                rdns = RDNS_RE2.match(header).groups()[0]
            elif RDNS_SMTP1.match(header) and "Exim" not in header:
                rdns = ""
            else:
                if RDNS_RE1.match(header) or RDNS_RE3.match(header):
                    rdns = ""
                elif RDNS_RE4.match(header) and "Exim" not in header:
                    rdns = ""
                else:
                    rdns = RDNS_RE.match(header).groups()[0]
        except (AttributeError, IndexError):
            pass
        if "@" in rdns:
            rdns = ""
        if '(Postfix)' in header:
            if UNKNOWN_RE_RDNS.match(header):
                rdns = UNKNOWN_RE_RDNS.match(header).groups()[1]
        if RDNS_IP_RE.match(rdns):
            rdns = ""
        if 'unknown' in rdns or rdns == 'UnknownHost':
            rdns = ""
        rdns = rdns.strip("[]")
        return rdns

    @staticmethod
    def get_ip(header):
        """Parsing the relay ip address from Received header

        :param header: The received header without the 'from ' at the begin
        :return: ip address if is found if not it returns an empty string
        """
        ip = ""
        ips = IP_ADDRESS.findall(header.split(" by ")[0])
        no_ips = len(ips)
        private_ips = list()
        count = 0
        for item in ips:
            clean_ip = item.strip("[ ]();\n")
            clean_ip = clean_ip.lower().replace('ipv6:','')
            clean_ip = clean_ip.lower().replace('::ffff:','')
            if IP_PRIVATE.search(clean_ip):
                count += 1
                private_ips.append(clean_ip)
            if not IP_PRIVATE.search(clean_ip):
                ip = clean_ip
                # break
        if no_ips != 0 and count == no_ips:
            ip = private_ips[0]
        return ip

    @staticmethod
    def get_by(header):
        """Parsing the relay server from Received header

        :param header: The received header without the 'from ' at the begin
        :return: by if is found if not it returns an empty string
        """
        by = ""
        try:
            if RDNS_RE3.match(header):
                by = RDNS_RE3.match(header).groups()[3]
            else:
                by = BY_RE.match(header).groups()[0]
        except (AttributeError, IndexError):
            pass
        return by

    @staticmethod
    def get_helo(header):
        """Parsing the helo server from Received header

        :param header: The received header without the 'from ' at the begin
        :return: helo if is found if not it returns an empty string
        """
        helo = ""
        try:
            if HELO_RE2.match(header):
                helo = HELO_RE2.match(header).groups()[0]
                if helo == 'unknown':
                    helo = ""
            if HELO_RE7.match(header):
                helo = HELO_RE7.match(header).groups()[0]
            if HELO_RE.match(header):
                helo = HELO_RE.match(header).groups()[0]
            elif HELO_RE3.match(header):
                helo = HELO_RE3.match(header).groups()[0]
                helo = helo.strip("[ ]();\n")
            elif HELO_RE4.match(header):
                helo = HELO_RE4.match(header).groups()[0]
            elif HELO_RE5.match(header):
                helo = HELO_RE5.match(header).groups()[0]
            elif HELO_RE6.match(header):
                helo = HELO_RE6.match(header).groups()[0]
            elif HELO_RE8.match(header):
                helo = HELO_RE8.match(header).groups()[0]
            elif HELO_RE9.match(header):
                helo = HELO_RE9.match(header).groups()[0]
            elif RDNS_RE4.match(header):
                helo = RDNS_RE4.match(header).groups()[1]
            elif HELO_RE10.match(header):
                helo = HELO_RE10.match(header).groups()[1].strip("[]")
        except (AttributeError, IndexError):
            pass
        return helo

    @staticmethod
    def get_ident(header):
        """Parsing the ident from Received header

        :param header: The received header without the 'from ' at the begin
        :return: ident if is found if not it returns an empty string
        """
        ident = ""
        try:
            if IDENT_RE.match(header):
                ident = IDENT_RE.match(header).groups()[0]
            elif IDENT_RE2.match(header):
                ident = IDENT_RE2.match(header).groups()[0]
        except (AttributeError, IndexError):
            pass
        return ident

    @staticmethod
    def get_id(header):
        """Parsing the id of the relay from Received header

        :param header: The received header without the 'from ' at the begin
        :return: id if is found if not it returns an empty string
        """
        id = ""
        try:
            id = ID_RE.match(header).groups()[0]
            id = id.strip("<>")
        except (AttributeError, IndexError):
            pass
        return id

    @staticmethod
    def get_auth(header):
        """Parsing the authentication from Received header

        :param header: The received header without the 'from ' at the begin
        :return: auth if is found if not it returns an empty string
        """
        auth = ""
        if ' by ' in header and AUTH_RE.match(header):
            try:
                auth = AUTH_RE.match(header).groups()[0]
            except IndexError:
                pass
        elif AUTH_RE3.search(header):
            auth = 'Postfix'
        elif ' by mx.google.com with ESMTPS id ' in header:
            try:
                version, cipher = AUTH_VC_RE.match(header).groups()
                auth = "GMail - transport={version} " \
                       "cipher={cipher}".format(version=version, cipher=cipher)
            except (AttributeError, IndexError):
                pass
        elif 'SquirrelMail authenticated ' in header:
            auth = "SquirrelMail"
        elif 'authenticated' in header and AUTH_RE2.search(header):
            auth = "CriticalPath"
        elif 'authenticated' in header:
            auth = "Sendmail"
        elif AUTH_RE4.search(header):
            re_auth = AUTH_RE4.search(header).groups()
            auth = "GMX (%s / %s)" % (re_auth[3], re_auth[0])
        elif AUTH_RE5.search(header):
            auth = "Communigate"
        return auth

    def _parse_message(self):
        for header in self.received_headers:
            if not self.check_for_skip(header):
                rdns = self.get_rdns(header)
                ip = self.get_ip(header)
                by = self.get_by(header)
                helo = self.get_helo(header)
                ident = self.get_ident(header)
                id = self.get_id(header)
                envfrom = self.get_envfrom(header)
                auth = self.get_auth(header)
                if header.startswith("X-ORIGINATING-IP"):
                    self.received.append({
                        "rdns": "", "ip": ip, "by": "",
                        "helo": "", "ident": "", "id": "", "envfrom": "",
                        "auth": ""})
                else:
                    self.received.append({
                        "rdns": rdns, "ip": ip, "by": by, "helo": helo,
                        "ident": ident, "id": id, "envfrom": envfrom, "auth":
                        auth})