yast/yast-yast2

View on GitHub
library/types/src/modules/URL.rb

Summary

Maintainability
D
2 days
Test Coverage
# ***************************************************************************
#
# Copyright (c) 2002 - 2012 Novell, Inc.
# All Rights Reserved.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.   See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, contact Novell, Inc.
#
# To contact Novell about this file by physical or electronic mail,
# you may find current contact information at www.novell.com
#
# ***************************************************************************
# File:  modules/URL.ycp
# Package:  yast2
# Summary:  Manipulate and Parse URLs
# Authors:  Michal Svec <msvec@suse.cz>
#    Anas Nashif <nashif@suse.cz>
# Flags:  Stable
#
# $Id$
require "yast"

module Yast
  # A module for dealing with URLs
  #
  # @note This is legacy code ported from the
  # {https://news.opensuse.org/2007/08/29/yast-independence-of-ycp/ YCP} era. Its use is highly
  # discouraged in favor of the {https://ruby-doc.org/stdlib-3.0.2/libdoc/uri/rdoc/URI.html URI}
  # standard library or the new {Y2Packager::ZyppUrl} (available from SLE-15-SP3 on) when working
  # with libzypp URLs.
  class URLClass < Module
    def main
      textdomain "base"

      Yast.import "Hostname"
      Yast.import "String"
      Yast.import "IP"
      Yast.import "URLRecode"

      # TODO: read URI(3), esp. compare the regex mentioned in the URI(3) with ours:
      #   my($scheme, $authority, $path, $query, $fragment) =
      #   $uri =~ m|^(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|;

      # Valid characters in URL
      #
      # bnc#694582 - addedd @ as it is allowed in authority part of URI.
      # for details see RFC2616 and RFC2396
      #
      @ValidChars = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.:_-/%@"

      # Transform map used for (un)escaping characters in username/password part of an URL.
      # It doesn't contain '%' because this character must be used in a particular
      # order (the first or the last) during processing
      @transform_map_passwd = {
        ";" => "%3b",
        "/" => "%2f",
        "?" => "%3f",
        ":" => "%3a",
        "@" => "%40",
        "&" => "%26",
        "=" => "%3d",
        "+" => "%2b",
        "$" => "%24",
        "," => "%2c",
        " " => "%20"
      }

      # Transform map used for (un)escaping characters in file location part of an URL.
      # It doesn't contain '%' because this character must be used in a particular
      # order (the first or the last) during processing
      @transform_map_filename = {
        ";" => "%3b",
        "?" => "%3f",
        ":" => "%3a",
        "@" => "%40",
        "&" => "%26",
        "=" => "%3d",
        "+" => "%2b",
        "$" => "%24",
        "," => "%2c",
        " " => "%20"
      }

      # Transform map used for (un)escaping characters in query part of a URL.
      # It doesn't contain '%' because this character must be used in a particular
      # order (the first or the last) during processing
      @transform_map_query = {
        ";" => "%3b",
        "?" => "%3f",
        "@" => "%40",
        "+" => "%2b",
        "$" => "%24",
        "," => "%2c",
        " " => "%20"
      }
    end

    # Escape reserved characters in string used as a part of URL (e.g. '%25' => '%', '%40' => '@'...)
    #
    # @param [String] in input string to escape
    # @param transformation map
    # @return [String] unescaped string
    #
    # @example
    #  URL::UnEscapeString ("http%3a%2f%2fsome.nice.url%2f%3awith%3a%2f%24p#ci%26l%2fch%40rs%2f", URL::transform_map_passwd)
    #    -> http://some.nice.url/:with:/$p#ci&l/ch@rs/

    def UnEscapeString(in_, transform)
      transform = deep_copy(transform)
      return "" if in_.nil? || in_ == ""

      # replace the other reserved characters
      Builtins.foreach(transform) do |tgt, src|
        # replace both upper and lower case escape sequences
        in_ = String.Replace(in_, Builtins.tolower(src), tgt)
        in_ = String.Replace(in_, Builtins.toupper(src), tgt)
      end

      # replace % at the end
      String.Replace(in_, "%25", "%")
    end

    # Escape reserved characters in string used as a part of URL (e.g. '%' => '%25', '@' => '%40'...)
    #
    # @param [String] in input string to escape
    # @param transformation map
    # @return [String] escaped string
    #
    # @example
    #  URL::EscapeString ("http://some.nice.url/:with:/$p#ci&l/ch@rs/", URL::transform_map_passwd)
    #    -> http%3a%2f%2fsome.nice.url%2f%3awith%3a%2f%24p#ci%26l%2fch%40rs%2f

    def EscapeString(in_, transform)
      transform = deep_copy(transform)
      ret = ""

      return ret if in_.nil? || in_ == ""

      # replace % at first
      ret = Builtins.mergestring(Builtins.splitstring(in_, "%"), "%25")

      # replace the other reserved characters
      Builtins.foreach(transform) do |src, tgt|
        ret = Builtins.mergestring(Builtins.splitstring(ret, src), tgt)
      end

      ret
    end

    # Tokenize URL
    # @param [String] url URL to be parsed
    # @return URL split to tokens
    # @example Parse("http://name:pass@www.suse.cz:80/path/index.html?question#part") ->
    #     $[
    #         "scheme"  : "http",
    #         "host"    : "www.suse.cz"
    #         "port"    : "80",
    #         "path"    : /path/index.html",
    #         "user"    : "name",
    #         "pass"    : "pass",
    #         "query"   : "question",
    #         "fragment": "part"
    #     ]
    def Parse(url)
      Builtins.y2debug("url=%1", url)

      # We don't parse empty URLs
      return {} if url.nil? || Ops.less_than(Builtins.size(url), 1)

      # Extract basic URL parts: scheme://host/path?question#part
      rawtokens = Builtins.regexptokenize(
        url,
        # 0,1: http://
        # 2: user:pass@www.suse.cz:23
        # 3: /some/path
        # 4,5: ?question
        # 6,7: #fragment
        "^" \
        "(([^:/?#]+):[/]{0,2})?" \
        "([^/?#]*)?" \
        "([^?#]*)?" \
        "(\\?([^#]*))?" \
        "(#(.*))?"
      )
      Builtins.y2debug("rawtokens=%1", rawtokens)
      tokens = {}
      Ops.set(tokens, "scheme", Ops.get_string(rawtokens, 1, ""))
      pth = Ops.get_string(rawtokens, 3, "")
      if Ops.get_string(tokens, "scheme", "") == "ftp"
        if Builtins.substring(pth, 0, 4) == "/%2f"
          pth = Ops.add("/", Builtins.substring(pth, 4))
        elsif pth != ""
          pth = Builtins.substring(pth, 1)
        end
      end
      Ops.set(tokens, "path", URLRecode.UnEscape(pth))
      Ops.set(
        tokens,
        "query",
        URLRecode.UnEscape(Ops.get_string(rawtokens, 5, ""))
      )
      Ops.set(
        tokens,
        "fragment",
        URLRecode.UnEscape(Ops.get_string(rawtokens, 7, ""))
      )

      # Extract username:pass@host:port
      userpass = Builtins.regexptokenize(
        Ops.get_string(rawtokens, 2, ""),
        # 0,1,2,3: user:pass@
        # 4,5,6,7: hostname|[xxx]
        # FIXME: "(([^:@]+)|(\\[([^]]+)\\]))" +
        # 8,9: port
        "^" \
        "(([^@:]+)(:([^@:]+))?@)?" \
        "(([^:@]+))" \
        "(:([^:@]+))?"
      )
      Builtins.y2debug("userpass=%1", userpass)

      Ops.set(
        tokens,
        "user",
        URLRecode.UnEscape(Ops.get_string(userpass, 1, ""))
      )
      Ops.set(
        tokens,
        "pass",
        URLRecode.UnEscape(Ops.get_string(userpass, 3, ""))
      )
      Ops.set(tokens, "port", Ops.get_string(userpass, 7, ""))

      if Ops.get_string(userpass, 5, "") == ""
        Ops.set(tokens, "host", Ops.get_string(userpass, 7, ""))
      else
        Ops.set(tokens, "host", Ops.get_string(userpass, 5, ""))
      end

      hostport6 = Builtins.substring(
        Ops.get_string(rawtokens, 2, ""),
        Builtins.size(Ops.get_string(userpass, 0, ""))
      )
      Builtins.y2debug("hostport6: %1", hostport6)

      # check if there is an IPv6 address
      host6 = Builtins.regexpsub(hostport6, "^\\[(.*)\\]", "\\1")

      if !host6.nil? && host6 != ""
        Builtins.y2milestone("IPv6 host detected: %1", host6)
        Ops.set(tokens, "host", host6)
        port6 = Builtins.regexpsub(hostport6, "^\\[.*\\]:(.*)", "\\1")
        Builtins.y2debug("port: %1", port6)
        Ops.set(tokens, "port", port6 || "")
      end

      # some exceptions for samba scheme (there is optional extra option "domain")
      if Ops.get_string(tokens, "scheme", "") == "samba" ||
          Ops.get_string(tokens, "scheme", "") == "smb"
        # NOTE: CUPS uses different URL syntax for Samba printers:
        #     smb://username:password@workgroup/server/printer
        # Fortunately yast2-printer does not use URL.ycp, so we can safely support libzypp syntax only:
        #     smb://username:passwd@servername/share/path/on/the/share?workgroup=mygroup

        options = MakeMapFromParams(Ops.get_string(tokens, "query", ""))

        Ops.set(tokens, "domain", Ops.get(options, "workgroup", "")) if Builtins.haskey(options, "workgroup")
      end

      # merge host and path if the scheme does not allow a host (bsc#991935)
      tokens = merge_host_and_path(tokens) if SCHEMES_WO_HOST.include?(tokens["scheme"].downcase)

      Builtins.y2debug("tokens=%1", tokens)
      deep_copy(tokens)
    end

    # Check URL
    # @param [String] url URL to be checked
    # @return true if correct
    # @see RFC 2396 (updated by RFC 2732)
    # @see also perl-URI: URI(3)
    def Check(url)
      # We don't allow empty URLs
      return false if url.nil? || Ops.less_than(Builtins.size(url), 1)

      # We don't allow URLs with spaces
      return false if url.include?(" ")

      tokens = Parse(url)

      Builtins.y2debug("tokens: %1", tokens)

      # Check "scheme"  : "http"
      if !Builtins.regexpmatch(
        Ops.get_string(tokens, "scheme", ""),
        "^[[:alpha:]]*$"
      )
        return false
      end

      # Check "host"    : "www.suse.cz"
      if !Hostname.CheckFQ(Ops.get_string(tokens, "host", "")) &&
          !IP.Check(Ops.get_string(tokens, "host", "")) &&
          Ops.get_string(tokens, "host", "") != ""
        return false
      end

      # Check "path"    : /path/index.html"

      # Check "port"    : "80"
      return false if !Builtins.regexpmatch(Ops.get_string(tokens, "port", ""), "^[0-9]*$")

      # Check "user"    : "name"

      # Check "pass"    : "pass"

      # Check "query"   : "question"

      # Check "fragment": "part"

      true
    end

    # Build URL from tokens as parsed with Parse
    # @param map token as returned from Parse
    # @return [String] url, empty string if invalid data is used to build the url.
    # @see RFC 2396 (updated by RFC 2732)
    # @see also perl-URI: URI(3)
    def Build(tokens)
      tokens = deep_copy(tokens)
      url = ""
      userpass = ""

      Builtins.y2debug("URL::Build(): input: %1", tokens)

      if Builtins.regexpmatch(
        Ops.get_string(tokens, "scheme", ""),
        "^[[:alpha:]]*$"
      )
        # if (tokens["scheme"]:"" == "samba") url="smb";
        #     else
        url = Ops.get_string(tokens, "scheme", "")
      end
      Builtins.y2debug("url: %1", url)
      if Ops.get_string(tokens, "user", "") != ""
        userpass = URLRecode.EscapePassword(Ops.get_string(tokens, "user", ""))
        Builtins.y2milestone(
          "Escaped username '%1' => '%2'",
          Ops.get_string(tokens, "user", ""),
          userpass
        )
      end
      if Builtins.size(userpass) != 0 &&
          Ops.get_string(tokens, "pass", "") != ""
        userpass = Builtins.sformat(
          "%1:%2",
          userpass,
          URLRecode.EscapePassword(Ops.get_string(tokens, "pass", ""))
        )
      end
      userpass = Ops.add(userpass, "@") if Ops.greater_than(Builtins.size(userpass), 0)

      url = Builtins.sformat("%1://%2", url, userpass)
      Builtins.y2debug("url: %1", url)

      if Hostname.CheckFQ(Ops.get_string(tokens, "host", "")) ||
          IP.Check(Ops.get_string(tokens, "host", ""))
        # enclose an IPv6 address in square brackets
        url = if IP.Check6(Ops.get_string(tokens, "host", ""))
          Builtins.sformat("%1[%2]", url, Ops.get_string(tokens, "host", ""))
        else
          Builtins.sformat("%1%2", url, Ops.get_string(tokens, "host", ""))
        end
      end
      Builtins.y2debug("url: %1", url)

      if Builtins.regexpmatch(Ops.get_string(tokens, "port", ""), "^[0-9]*$") &&
          Ops.get_string(tokens, "port", "") != ""
        url = Builtins.sformat("%1:%2", url, Ops.get_string(tokens, "port", ""))
      end
      Builtins.y2debug("url: %1", url)

      # path is not empty and doesn't start with "/"
      if Ops.get_string(tokens, "path", "") != "" &&
          !Builtins.regexpmatch(Ops.get_string(tokens, "path", ""), "^/")
        url = Builtins.sformat(
          "%1/%2",
          url,
          URLRecode.EscapePath(Ops.get_string(tokens, "path", ""))
        )
      # patch is not empty and starts with "/"
      elsif Ops.get_string(tokens, "path", "") != "" &&
          Builtins.regexpmatch(Ops.get_string(tokens, "path", ""), "^/")
        while Builtins.substring(Ops.get_string(tokens, "path", ""), 0, 2) == "//"
          Ops.set(
            tokens,
            "path",
            Builtins.substring(Ops.get_string(tokens, "path", ""), 1)
          )
        end
        url = if Ops.get_string(tokens, "scheme", "") == "ftp"
          Builtins.sformat(
            "%1/%%2f%2",
            url,
            Builtins.substring(
              URLRecode.EscapePath(Ops.get_string(tokens, "path", "")),
              1
            )
          )
        else
          Builtins.sformat(
            "%1%2",
            url,
            URLRecode.EscapePath(Ops.get_string(tokens, "path", ""))
          )
        end
      end
      Builtins.y2debug("url: %1", url)

      query_map = MakeMapFromParams(Ops.get_string(tokens, "query", ""))

      if Ops.get_string(tokens, "scheme", "") == "smb" &&
          Ops.greater_than(
            Builtins.size(Ops.get_string(tokens, "domain", "")),
            0
          ) &&
          Ops.get(query_map, "workgroup", "") !=
              Ops.get_string(tokens, "domain", "")
        Ops.set(query_map, "workgroup", Ops.get_string(tokens, "domain", ""))

        Ops.set(tokens, "query", MakeParamsFromMap(query_map))
      end

      if Ops.get_string(tokens, "query", "") != ""
        url = Builtins.sformat(
          "%1?%2",
          url,
          URLRecode.EscapeQuery(Ops.get_string(tokens, "query", ""))
        )
      end

      if Ops.get_string(tokens, "fragment", "") != ""
        url = Builtins.sformat(
          "%1#%2",
          url,
          URLRecode.EscapePassword(Ops.get_string(tokens, "fragment", ""))
        )
      end
      Builtins.y2debug("url: %1", url)

      if !Check(url)
        Builtins.y2error("Invalid URL: %1", url)
        return ""
      end

      Builtins.y2debug("URL::Build(): result: %1", url)

      url
    end

    #  * Format URL - truncate the middle part of the directory to fit to the requested lenght.
    #  *
    #  * Elements in the middle of the path specification are replaced by ellipsis (...).
    #  * The result migth be longer that requested size if other URL parts are longer than the requested size.
    #  * If the requested size is greater than size of the full URL then full URL is returned.
    #  * Only path element of the URL is changed the other parts are not modified (e.g. protocol name)
    #  *
    #  * @example FormatURL("http://download.opensuse.org/very/log/path/which/will/be/truncated/target_file", 45)
    # &nbsp;&nbsp;&nbsp;&nbsp;-> "http://download.opensuse.org/.../target_file"
    #  * @example FormatURL("http://download.opensuse.org/very/log/path/which/will/be/truncated/target_file", 60)
    # &nbsp;&nbsp;&nbsp;&nbsp;-> "http://download.opensuse.org/very/.../be/truncated/target_file"
    #  *
    #  * @param tokens parsed URL
    #  * @see Parse should be used to convert URL string to a map (tokens parameter)
    #  * @param len requested maximum lenght of the output string
    #  * @return string Truncated URL
    def FormatURL(tokens, len)
      tokens = deep_copy(tokens)
      ret = Build(tokens)

      # full URL is shorter than requested, no truncation needed
      return ret if Ops.less_or_equal(Builtins.size(ret), len)

      # it's too long, some parts must be removed
      pth = Ops.get_string(tokens, "path", "")
      Ops.set(tokens, "path", "")

      no_path = Build(tokens)
      # size for the directory part
      dir_size = Ops.subtract(len, Builtins.size(no_path))

      # remove the path in the middle
      new_path = String.FormatFilename(pth, dir_size)

      # build the url with the new path
      Ops.set(tokens, "path", new_path)
      Build(tokens)
    end

    # y2milestone("%1", Parse("http://a:b@www.suse.cz:33/ahoj/nekde?neco#blah"));
    # y2milestone("%1", Parse("ftp://www.suse.cz/ah"));
    # y2milestone("%1", Parse("ftp://www.suse.cz:22/ah"));
    # y2milestone("%1", Parse("www.suse.cz/ah"));
    #
    # y2milestone("%1", Check("http://a:b@www.suse.cz:33/ahoj/nekde?neco#blah"));
    # y2milestone("%1", Check("ftp://www.suse.cz/ah"));
    # y2milestone("%1", Check("ftp://www.suse.cz:22/ah"));
    # y2milestone("%1", Check("www.suse.cz/ah"));
    # y2milestone("%1", Check("www.suse.cz ah"));
    # y2milestone("%1", Check(""));
    # y2milestone("%1", Check(nil));

    # Reads list of HTTP params and returns them as map.
    # (Useful also for cd:/, dvd:/, nfs:/ ... params)
    # Neither keys nor values are HTML-unescaped, see UnEscapeString().
    #
    # @param [String] params
    # @return [Hash{String => String}] params
    #
    # @example
    #      MakeMapFromParams ("device=sda3&login=aaa&password=bbb") -> $[
    #              "device"   : "sda3",
    #              "login"    : "aaa",
    #              "password" : "bbb"
    #      ]
    def MakeMapFromParams(params)
      # Error
      if params.nil?
        Builtins.y2error("Erroneous (nil) params!")
        return nil
        # Empty
      elsif params == ""
        return {}
      end

      params_list = Builtins.splitstring(params, "&")

      params_list = Builtins.filter(params_list) do |one_param|
        one_param != "" && !one_param.nil?
      end

      ret = {}
      eq_pos = nil
      opt = ""
      val = ""

      Builtins.foreach(params_list) do |one_param|
        eq_pos = Builtins.search(one_param, "=")
        if eq_pos.nil?
          Ops.set(ret, one_param, "")
        else
          opt = Builtins.substring(one_param, 0, eq_pos)
          val = Builtins.substring(one_param, Ops.add(eq_pos, 1))

          Ops.set(ret, opt, val)
        end
      end

      deep_copy(ret)
    end

    # Returns string made of HTTP params. It's a reverse function to MakeMapFromParams().
    # Neither keys nor values are HTML-escaped, use EscapeString() if needed.
    #
    # @param map <string, string>
    #
    # @see #MakeMapFromParams
    #
    # @example
    #   MakeMapFromParams ($[
    #     "param1" : "a",
    #     "param2" : "b",
    #     "param3" : "c",
    #   ]) -> "param1=a&param2=b&param3=c"
    def MakeParamsFromMap(params_map)
      params_map = deep_copy(params_map)
      # ["key1=value1", "key2=value2", ...] -> "key1=value1&key2=value2"
      Builtins.mergestring(
        # ["key" : "value", ...] -> ["key=value", ...]
        Builtins.maplist(params_map) do |key, value|
          if value.nil?
            Builtins.y2warning("Empty value for key %1", key)
            value = ""
          end
          if key.nil? || key == ""
            Builtins.y2error("Empty key (will be skipped)")
            next ""
          end
          # "key=value"
          Builtins.sformat("%1=%2", key, value)
        end,
        "&"
      )
    end

    # Hide password in an URL - replaces the password in the URL by 'PASSWORD' string.
    # If there is no password in the URL the original URL is returned.
    # It should be used when an URL is logged to y2log or when it is displayed to user.
    # @param [String] url original URL
    # @return [String] new URL with 'PASSWORD' password or unmodified URL if there is no password
    def HidePassword(url)
      # Url::Build(Url::Parse) transforms the URL too much, see #247249#c41
      # replace ://user:password@ by ://user:PASSWORD@
      subd = Builtins.regexpsub(
        url,
        "(.*)(://[^/:]*):[^/@]*@(.*)",
        "\\1\\2:PASSWORD@\\3"
      )
      subd.nil? ? url : subd
    end

    # Hide password token in parsed URL (by URL::Parse()) - the password is replaced by 'PASSWORD' string.
    # Similar to HidePassword() but uses a parsed URL as the input.
    # @param [Hash] tokens input
    # @return [Hash] map with replaced password
    def HidePasswordToken(tokens)
      tokens = deep_copy(tokens)
      ret = deep_copy(tokens)

      # hide the password if it's there
      if Builtins.haskey(ret, "pass") &&
          Ops.greater_than(Builtins.size(Ops.get_string(ret, "pass", "")), 0)
        Ops.set(ret, "pass", "PASSWORD")
      end

      deep_copy(ret)
    end

    publish variable: :ValidChars, type: "string"
    publish variable: :transform_map_passwd, type: "map <string, string>"
    publish variable: :transform_map_filename, type: "map <string, string>"
    publish variable: :transform_map_query, type: "map <string, string>"
    publish function: :UnEscapeString, type: "string (string, map <string, string>)"
    publish function: :EscapeString, type: "string (string, map <string, string>)"
    publish function: :MakeMapFromParams, type: "map <string, string> (string)"
    publish function: :MakeParamsFromMap, type: "string (map <string, string>)"
    publish function: :Parse, type: "map (string)"
    publish function: :Check, type: "boolean (string)"
    publish function: :Build, type: "string (map)"
    publish function: :FormatURL, type: "string (map, integer)"
    publish function: :HidePassword, type: "string (string)"
    publish function: :HidePasswordToken, type: "map (map)"

  private

    # Schemes which should not include a host.Should be kept in sync with libzypp.
    # @see https://github.com/openSUSE/libzypp/blob/d9c97b883ac1561225c4d728a5f6c8a34498d5b9/zypp/Url.cc#L184-L190
    # @see #merge_host_and_path
    SCHEMES_WO_HOST = ["cd", "dvd", "hd", "iso", "dir"].freeze

    # Merges host and path tokens
    #
    # In schemes like 'cd' or 'dvd' the host part is not allowed.
    # It leads to conversions like: "cd:/?device=/dev/sr0" to "cd://?device=/dev/sr0"
    # or "cd:/info" to "cd://info".
    #
    # If no host or path are specified, the path is set to "/".
    #
    # @param  [Hash<String,String>] URL tokens
    # @return [Hash<String,String>] URL tokens with host and path merged
    def merge_host_and_path(tokens)
      parts = [tokens["host"], tokens["path"]].reject(&:empty?)
      tokens.merge(
        "path" => File.join("/", *parts),
        "host" => ""
      )
    end
  end

  URL = URLClass.new
  URL.main
end