rakelib/extensions.rake
# frozen_string_literal: true
require "rbconfig"
require "shellwords"
CrossRuby = Struct.new(:version, :platform) do
WINDOWS_PLATFORM_REGEX = /mingw|mswin/
MINGWUCRT_PLATFORM_REGEX = /mingw-ucrt/
MINGW32_PLATFORM_REGEX = /mingw32/
LINUX_PLATFORM_REGEX = /linux/
X86_LINUX_PLATFORM_REGEX = /x86.*linux/
AARCH_LINUX_PLATFORM_REGEX = /aarch.*linux/
ARM_LINUX_PLATFORM_REGEX = /arm-linux/
DARWIN_PLATFORM_REGEX = /darwin/
def windows?
!!(platform =~ WINDOWS_PLATFORM_REGEX)
end
def linux?
!!(platform =~ LINUX_PLATFORM_REGEX)
end
def darwin?
!!(platform =~ DARWIN_PLATFORM_REGEX)
end
def ver
@ver ||= version[/\A[^-]+/]
end
def minor_ver
@minor_ver ||= ver[/\A\d\.\d(?=\.)/]
end
def api_ver_suffix
case minor_ver
when nil
raise "CrossRuby.api_ver_suffix: unsupported version: #{ver}"
else
minor_ver.delete(".") << "0"
end
end
def host
@host ||= case platform
when "x64-mingw-ucrt"
"x86_64-w64-mingw32"
when "x64-mingw32"
"x86_64-w64-mingw32"
when "x86-mingw32"
"i686-w64-mingw32"
when "x86_64-linux"
"x86_64-linux-gnu"
when "x86-linux"
"i686-linux-gnu"
when "aarch64-linux"
"aarch64-linux"
when "x86_64-darwin"
"x86_64-darwin"
when "arm64-darwin"
"aarch64-darwin"
else
raise "CrossRuby.platform: unsupported platform: #{platform}"
end
end
def tool(name)
(@binutils_prefix ||= case platform
when "x64-mingw-ucrt", "x64-mingw32"
"x86_64-w64-mingw32-"
when "x86-mingw32"
"i686-w64-mingw32-"
when "x86_64-linux"
"x86_64-redhat-linux-gnu-"
when "x86-linux"
"i686-redhat-linux-gnu-"
when "aarch64-linux"
"aarch64-linux-gnu-"
when "x86_64-darwin"
"x86_64-apple-darwin-"
when "arm64-darwin"
"aarch64-apple-darwin-"
when "arm-linux"
"arm-linux-gnueabihf-"
else
raise "CrossRuby.tool: unmatched platform: #{platform}"
end) + name
end
def target_file_format
case platform
when "x64-mingw-ucrt", "x64-mingw32"
"pei-x86-64"
when "x86-mingw32"
"pei-i386"
when "x86_64-linux"
"elf64-x86-64"
when "x86-linux"
"elf32-i386"
when "aarch64-linux"
"elf64-littleaarch64"
when "x86_64-darwin"
"Mach-O 64-bit x86-64" # hmm
when "arm64-darwin"
"Mach-O arm64"
when "arm-linux"
"elf32-littlearm"
else
raise "CrossRuby.target_file_format: unmatched platform: #{platform}"
end
end
def dll_ext
darwin? ? "bundle" : "so"
end
def dll_staging_path
"tmp/#{platform}/stage/lib/#{NOKOGIRI_SPEC.name}/#{minor_ver}/#{NOKOGIRI_SPEC.name}.#{dll_ext}"
end
def libruby_dll
case platform
when "x64-mingw-ucrt"
"x64-ucrt-ruby#{api_ver_suffix}.dll"
when "x64-mingw32"
"x64-msvcrt-ruby#{api_ver_suffix}.dll"
when "x86-mingw32"
"msvcrt-ruby#{api_ver_suffix}.dll"
else
raise "CrossRuby.libruby_dll: unmatched platform: #{platform}"
end
end
def allowed_dlls
case platform
when MINGW32_PLATFORM_REGEX
[
"kernel32.dll",
"msvcrt.dll",
"ws2_32.dll",
"user32.dll",
"advapi32.dll",
libruby_dll,
]
when MINGWUCRT_PLATFORM_REGEX
[
"kernel32.dll",
"ws2_32.dll",
"advapi32.dll",
"api-ms-win-crt-convert-l1-1-0.dll",
"api-ms-win-crt-environment-l1-1-0.dll",
"api-ms-win-crt-filesystem-l1-1-0.dll",
"api-ms-win-crt-heap-l1-1-0.dll",
"api-ms-win-crt-locale-l1-1-0.dll",
"api-ms-win-crt-math-l1-1-0.dll",
"api-ms-win-crt-private-l1-1-0.dll",
"api-ms-win-crt-runtime-l1-1-0.dll",
"api-ms-win-crt-stdio-l1-1-0.dll",
"api-ms-win-crt-string-l1-1-0.dll",
"api-ms-win-crt-time-l1-1-0.dll",
"api-ms-win-crt-utility-l1-1-0.dll",
libruby_dll,
]
when X86_LINUX_PLATFORM_REGEX
[
"libm.so.6",
"libc.so.6",
"libdl.so.2", # on old dists only - now in libc
].tap do |dlls|
dlls << "libpthread.so.0" if ver >= "3.2.0"
end
when AARCH_LINUX_PLATFORM_REGEX
[
"libm.so.6",
"libc.so.6",
"libdl.so.2", # on old dists only - now in libc
"ld-linux-aarch64.so.1",
].tap do |dlls|
dlls << "libpthread.so.0" if ver >= "3.2.0"
end
when DARWIN_PLATFORM_REGEX
[
"/usr/lib/libSystem.B.dylib",
"/usr/lib/liblzma.5.dylib",
"/usr/lib/libobjc.A.dylib",
]
when ARM_LINUX_PLATFORM_REGEX
[
"libm.so.6",
"libdl.so.2",
"libc.so.6",
"ld-linux-armhf.so.3",
].tap do |dlls|
dlls << "libpthread.so.0" if ver >= "3.2.0"
end
else
raise "CrossRuby.allowed_dlls: unmatched platform: #{platform}"
end
end
def dll_ref_versions
case platform
when X86_LINUX_PLATFORM_REGEX
{ "GLIBC" => "2.17" }
when AARCH_LINUX_PLATFORM_REGEX, ARM_LINUX_PLATFORM_REGEX
{ "GLIBC" => "2.29" }
else
raise "CrossRuby.dll_ref_versions: unmatched platform: #{platform}"
end
end
end
CROSS_RUBIES = File.read(".cross_rubies").split("\n").filter_map do |line|
case line
when /\A([^#]+):([^#]+)/
CrossRuby.new(Regexp.last_match(1), Regexp.last_match(2))
end
end
ENV["RUBY_CC_VERSION"] = CROSS_RUBIES.map(&:ver).uniq.join(":")
require "rake_compiler_dock"
def java?
RUBY_PLATFORM.include?("java")
end
def add_file_to_gem(relative_source_path)
if relative_source_path.nil? || !File.exist?(relative_source_path)
raise "Cannot find file '#{relative_source_path}'"
end
dest_path = File.join(gem_build_path, relative_source_path)
dest_dir = File.dirname(dest_path)
mkdir_p(dest_dir) unless Dir.exist?(dest_dir)
rm_f(dest_path) if File.exist?(dest_path)
safe_ln(relative_source_path, dest_path)
NOKOGIRI_SPEC.files << relative_source_path
end
def gem_build_path
File.join("pkg", NOKOGIRI_SPEC.full_name)
end
def verify_dll(dll, cross_ruby)
allowed_imports = cross_ruby.allowed_dlls
if cross_ruby.windows?
dump = %x(#{["env", "LANG=C", cross_ruby.tool("objdump"), "-p", dll].shelljoin})
raise "unexpected file format for generated dll #{dll}" unless /file format #{Regexp.quote(cross_ruby.target_file_format)}\s/.match?(dump)
raise "export function Init_nokogiri not in dll #{dll}" unless /Table.*\sInit_nokogiri\s/mi.match?(dump)
# Verify that the DLL dependencies are all allowed.
actual_imports = dump.scan(/DLL Name: (.*)$/).map { |name| name.first.downcase }.uniq
unless (actual_imports - allowed_imports).empty?
raise "unallowed so imports #{actual_imports.inspect} in #{dll} (allowed #{allowed_imports.inspect})"
end
elsif cross_ruby.linux?
dump = %x(#{["env", "LANG=C", cross_ruby.tool("objdump"), "-p", dll].shelljoin})
nm = %x(#{["env", "LANG=C", cross_ruby.tool("nm"), "-D", dll].shelljoin})
raise "unexpected file format for generated dll #{dll}" unless /file format #{Regexp.quote(cross_ruby.target_file_format)}\s/.match?(dump)
raise "export function Init_nokogiri not in dll #{dll}" unless nm.include?(" T Init_nokogiri")
# Verify that the DLL dependencies are all allowed.
actual_imports = dump.scan(/NEEDED\s+(.*)/).map(&:first).uniq
unless (actual_imports - allowed_imports).empty?
raise "unallowed so imports #{actual_imports.inspect} in #{dll} (allowed #{allowed_imports.inspect})"
end
# Verify that the expected so version requirements match the actual dependencies.
ref_versions_data = dump.scan(/0x[\da-f]+ 0x[\da-f]+ \d+ (\w+)_([\d\.]+)$/i)
# Build a hash of library versions like {"LIBUDEV"=>"183", "GLIBC"=>"2.17"}
actual_ref_versions = ref_versions_data.each.with_object({}) do |(lib, ver), h|
if !h[lib] || ver.split(".").map(&:to_i).pack("C*") > h[lib].split(".").map(&:to_i).pack("C*")
h[lib] = ver
end
end
if actual_ref_versions != cross_ruby.dll_ref_versions
raise "unexpected so version requirements #{actual_ref_versions.inspect} in #{dll}"
end
elsif cross_ruby.darwin?
dump = %x(#{["env", "LANG=C", cross_ruby.tool("objdump"), "-p", dll].shelljoin})
nm = %x(#{["env", "LANG=C", cross_ruby.tool("nm"), "-g", dll].shelljoin})
raise "unexpected file format for generated dll #{dll}" unless /file format #{Regexp.quote(cross_ruby.target_file_format)}\s/.match?(dump)
raise "export function Init_nokogiri not in dll #{dll}" unless / T _?Init_nokogiri/.match?(nm)
# if liblzma is being referenced, let's make sure it's referring
# to the system-installed file and not the homebrew-installed file.
ldd = %x(#{["env", "LANG=C", cross_ruby.tool("otool"), "-L", dll].shelljoin})
if (liblzma_refs = ldd.scan(/^\t([^ ]+) /).map(&:first).uniq.grep(/liblzma/))
liblzma_refs.each do |ref|
new_ref = File.join("/usr/lib", File.basename(ref))
sh(["env", "LANG=C", cross_ruby.tool("install_name_tool"), "-change", ref, new_ref, dll].shelljoin)
end
# reload!
ldd = %x(#{["env", "LANG=C", cross_ruby.tool("otool"), "-L", dll].shelljoin})
end
# Verify that the DLL dependencies are all allowed.
actual_imports = ldd.scan(/^\t([^ ]+) /).map(&:first).uniq
unless (actual_imports - allowed_imports).empty?
raise "unallowed so imports #{actual_imports.inspect} in #{dll} (allowed #{allowed_imports.inspect})"
end
end
puts "verify_dll: #{dll}: passed shared library sanity checks"
end
CROSS_RUBIES.each do |cross_ruby|
task cross_ruby.dll_staging_path do |t| # rubocop:disable Rake/Desc
verify_dll t.name, cross_ruby
end
end
namespace "gem" do
CROSS_RUBIES.find_all { |cr| cr.windows? || cr.linux? || cr.darwin? }.map(&:platform).uniq.each do |plat|
desc "build native gem for #{plat} platform"
task plat do
RakeCompilerDock.sh(<<~EOT, platform: plat, verbose: true)
ruby -v &&
gem install bundler --no-document &&
bundle &&
bundle exec rake gem:#{plat}:builder MAKE='nice make -j`nproc`'
EOT
end
namespace plat do
desc "build native gem for #{plat} platform (guest container)"
task "builder" do
# use Task#invoke because the pkg/*gem task is defined at runtime
Rake::Task["native:#{plat}"].invoke
Rake::Task["pkg/#{NOKOGIRI_SPEC.full_name}-#{Gem::Platform.new(plat)}.gem"].invoke
end
end
end
desc "build a jruby gem"
task "jruby" do
RakeCompilerDock.sh(<<~EOF, rubyvm: "jruby", platform: "jruby", verbose: true)
gem install bundler --no-document &&
bundle &&
bundle exec rake java gem
EOF
end
desc "build native gems for windows"
multitask "windows" => CROSS_RUBIES.find_all(&:windows?).map(&:platform).uniq
desc "build native gems for linux"
multitask "linux" => CROSS_RUBIES.find_all(&:linux?).map(&:platform).uniq
desc "build native gems for darwin"
multitask "darwin" => CROSS_RUBIES.find_all(&:darwin?).map(&:platform).uniq
end
if java?
# append to the existing "java" task defined by rake-compiler
task "java" do # rubocop:disable Rake/Desc
# if we're building the java gem, don't build the vanilla gem (see rakelib/package.rake)
Rake::Task["pkg/#{NOKOGIRI_SPEC.full_name}.gem"].clear
end
require "rake/javaextensiontask"
Rake::JavaExtensionTask.new("nokogiri", NOKOGIRI_SPEC.dup) do |ext|
# Keep the extension C files because they have docstrings (and Java files don't)
ext.gem_spec.files.reject! { |path| File.fnmatch?("ext/nokogiri/*.h", path) }
ext.gem_spec.files.reject! { |path| File.fnmatch?("gumbo-parser/**/*", path) }
ext.ext_dir = "ext/java"
ext.lib_dir = "lib/nokogiri"
ext.source_version = "1.8"
ext.target_version = "1.8"
ext.classpath = ext.gem_spec.files.select { |path| File.fnmatch?("**/*.jar", path) }.join(":")
ext.debug = true if ENV["JAVA_DEBUG"]
end
task gem_build_path => [:compile] do
add_file_to_gem "lib/nokogiri/nokogiri.jar"
end
desc "Vendor java dependencies"
task :vendor_jars do
require "jars/installer"
FileUtils.rm(FileList["lib/nokogiri/jruby/*/**/*.jar"], verbose: true)
jars = Jars::Installer.vendor_jars!("lib/nokogiri/jruby")
jar_dependencies = jars.sort_by(&:gav).each_with_object({}) do |a, d|
g, a, v = a.gav.split(":")
name = [g, a].join(":")
d[name] = v
end
# output this to try to minimize git merge conflicts going forward
string_rep = "{\n"
jar_dependencies.each do |ga, v|
string_rep += " #{ga.inspect} => #{v.inspect},\n"
end
string_rep += " }"
File.open("lib/nokogiri/jruby/nokogiri_jars.rb", "a") do |f|
f.puts
f.puts <<~EOF
module Nokogiri
# generated by the :vendor_jars rake task
JAR_DEPENDENCIES = #{string_rep}.freeze
XERCES_VERSION = JAR_DEPENDENCIES["xerces:xercesImpl"]
NEKO_VERSION = JAR_DEPENDENCIES["net.sourceforge.htmlunit:neko-htmlunit"]
end
EOF
end
end
else
require "rake/extensiontask"
require "yaml"
dependencies = YAML.load_file("dependencies.yml")
task gem_build_path do # rubocop:disable Rake/Desc
NOKOGIRI_SPEC.files.reject! { |path| File.fnmatch?("**/*.{java,jar}", path, File::FNM_EXTGLOB) }
["libxml2", "libxslt"].each do |lib|
version = dependencies[lib]["version"]
archive = Dir.glob(File.join("ports", "archives", "#{lib}-#{version}.tar.*")).first
add_file_to_gem(archive)
patchesdir = File.join("patches", lib)
patches = %x(#{["git", "ls-files", patchesdir].shelljoin}).split("\n").grep(/\.patch\z/)
patches.each { |patch| add_file_to_gem patch }
untracked = Dir[File.join(patchesdir, "*.patch")] - patches
at_exit do
untracked.each { |patch| puts "** WARNING: untracked patch file not added to gem: #{patch}" }
end
end
end
Rake::ExtensionTask.new("nokogiri", NOKOGIRI_SPEC.dup) do |ext|
ext.source_pattern = "*.{c,cc,cpp,h}"
ext.gem_spec.files.reject! { |path| File.fnmatch?("**/*.{java,jar}", path, File::FNM_EXTGLOB) }
ext.lib_dir = File.join(*["lib", "nokogiri", ENV["FAT_DIR"]].compact)
ext.config_options << ENV["EXTOPTS"] if ENV["EXTOPTS"]
ext.cross_compile = true
ext.cross_platform = CROSS_RUBIES.map(&:platform).uniq
ext.cross_config_options << "--enable-cross-build"
ext.cross_compiling do |spec|
spec.files.reject! { |path| File.fnmatch?("ports/*", path) }
spec.files.reject! { |path| File.fnmatch?("gumbo-parser/**/*", path) }
spec.dependencies.reject! { |dep| dep.name == "mini_portile2" }
# when pre-compiling a native gem, package all the C headers sitting in ext/nokogiri/include
# which were copied there in the $INSTALLFILES section of extconf.rb.
# (see scripts/test-gem-file-contents and scripts/test-gem-installation for tests)
headers_dir = "ext/nokogiri/include"
["libxml2", "libxslt"].each do |lib|
unless File.directory?(File.join(headers_dir, lib))
raise "#{lib} headers are not present in #{headers_dir}"
end
end
Dir.glob(File.join(headers_dir, "**", "*.h")).each do |header|
spec.files << header
end
end
end
end