developer/bin/import_google_fonts
#!/usr/bin/env python3
#
# import_google_fonts
#
# Using Python rather than Ruby for the Protocol Buffer parser.
# https://github.com/protocolbuffers/protobuf/issues/6508#issuecomment-522165498
#
# To install dependencies:
#
# pip3 install gftools html2text jinja2 protobuf
from functools import reduce
from glob import glob
import os
import re
import sys
from google.protobuf import text_format
import gftools.fonts_public_pb2 as fonts_pb2
import jinja2
import html2text
import urllib.request
def parse_metadata(filename):
# based off of
# https://github.com/googlefonts/gftools/blob/2bfd4acd402b353aaeb46b991e6cad855001e4c8/Lib/gftools/util/google_fonts.py
with open(filename) as f:
meta = fonts_pb2.FamilyProto()
text_format.Merge(f.read(), meta)
return meta
class FontCask:
ENVIRONMENT = jinja2.Environment(
keep_trailing_newline=True, trim_blocks=True, undefined=jinja2.StrictUndefined,
)
TEMPLATE = ENVIRONMENT.from_string(
"""cask "{{token}}" do
version :latest
sha256 :no_check
{% if files|length == 1 %}
url "https://github.com/google/fonts/raw/main/{{folder}}/{{files[0] | urlencode}}"
{%- if not 'github.com/' in homepage %},
verified: "github.com/google/fonts/"
{%- endif +%}
{% else %}
url "https://github.com/google/fonts.git",
{%- if not 'github.com/' in homepage +%}
verified: "github.com/google/fonts",
{%- endif +%}
branch: "main",
only_path: "{{folder}}"
{% endif %}
name "{{font_name}}"
homepage "{{homepage}}"
{% for file in files %}
font "{{file}}"
{% endfor %}
# No zap stanza required
end
"""
)
def __init__(self, folder, meta, description=None, early_access=False):
self.folder = folder
self.meta = meta
self.desc = description
self.early_access = early_access
self.homepage_override = None
def font_name(self):
return self.meta.name
def description(self):
if not self.desc:
return None
if len(self.desc) == 0:
return None
return self.desc
def token(self):
# https://github.com/Homebrew/homebrew-cask-fonts/blob/master/CONTRIBUTING.md#converting-the-canonical-name-to-a-token
token = self.font_name().lower().replace(" ", "-")
return f"font-{token}"
def token_sharding_dir(self):
return "font-" + self.token().split("font-")[1][0]
def dest_path(self):
return os.path.join("Casks", "font", f"{self.token_sharding_dir()}", f"{self.token()}.rb")
def name_path(self):
return self.font_name().replace(" ", "+")
def homepage(self):
if self.homepage_override is not None:
return self.homepage_override
if self.early_access:
return f"https://fonts.google.com/earlyaccess"
return f"https://fonts.google.com/specimen/{self.name_path()}"
def files(self):
results = [font.filename for font in self.meta.fonts]
results.sort()
return results
def cask_content(self):
return self.TEMPLATE.render(
token=self.token(),
folder=self.folder,
font_name=self.font_name(),
desc=self.description(),
homepage=self.homepage(),
files=self.files(),
)
def is_other_foundry(cask_path):
with open(cask_path) as f:
contents = f.read()
return not re.search(r"url ['\"]https://github.com/google/fonts", contents)
def should_skip(cask_path):
if os.path.exists(cask_path):
# Cask already exists
if is_other_foundry(cask_path):
print("Other foundry:", cask_path)
# don't overwrite it, per
# https://github.com/Homebrew/homebrew-cask-fonts/blob/master/CONTRIBUTING.md#google-web-font-directory
return True
return False
def metadata_to_cask(meta_file, repo_dir):
folder = os.path.dirname(os.path.relpath(meta_file, start=repo_dir))
meta = parse_metadata(meta_file)
description_path = os.path.join(os.path.dirname(meta_file), "DESCRIPTION.en_us.html")
description = None
if os.path.exists(description_path):
with open(description_path) as f:
h2t = html2text.HTML2Text()
h2t.ignore_links = True
h2t.ignore_images = True
h2t.ignore_tables = True
h2t.ignore_emphasis = True
contents = " ".join(h2t.handle(f.read()).replace('"', '').split())
regex = r".*" + re.escape(meta.name) + r"\s+(?:.*\s+)?is(?:\s+an?|the)?\s+"
parts = re.split(regex, contents, maxsplit=1)
if len(parts) > 1:
description = parts[1].split(".")[0].capitalize()
return FontCask(folder, meta, description=description)
def write_cask(cask):
path = cask.dest_path()
if should_skip(path):
return False
content = cask.cask_content()
if os.path.exists(path):
with open(path, "r") as f:
if f.read() == content:
return False
directory = os.path.dirname(path)
if not os.path.exists(directory):
os.makedirs(directory)
with open(path, "w") as f:
f.write(content)
return True
def find_google_casks():
casks = {}
for cask_path in glob('Casks/font/**/*.rb'):
token = os.path.splitext(os.path.basename(cask_path))[0]
with open(cask_path, "r") as f:
contents = f.read()
# Skip "font-material-symbols" as it matches the url regex, but is not included in the Google Fonts repo
if os.path.basename(cask_path) == "font-material-symbols.rb":
continue
if not re.search(r"(github\.com\/google\/fonts|fonts\.google\.com|google\.com/fonts)", contents):
continue
homepage = re.search(r"homepage\s+([\"'])(.*)(\1)\s*", contents)
if homepage:
homepage = homepage[2]
desc = re.search(r"desc\s+([\"'])(.*)(\1)\s*", contents)
if desc:
description = desc[2]
else:
description = None
casks[token] = {
'path': cask_path,
'description': description,
'homepage': homepage,
}
return casks
def find_family_folders(repo_dir):
SUBDIRS = ["apache", "ofl", "ufl"]
folders_list = [glob(os.path.join(repo_dir, subdir, "*")) for subdir in SUBDIRS]
# flatten
return reduce(lambda x, y: x + y, folders_list)
# https://www.geeksforgeeks.org/python-split-camelcase-string-to-individual-strings/
def camel_case_split(str):
return re.findall(r"[A-Z](?:[a-z]+|[A-Z]*(?=[A-Z]|$))", str)
def derive_name(font_file):
parent_dir_name = os.path.basename(os.path.dirname(font_file))
font_file = os.path.splitext(os.path.basename(font_file))[0]
font_file = font_file[: len(parent_dir_name)]
font_file = re.sub(r"-\w+$", "", font_file)
name_parts = camel_case_split(font_file)
result = " ".join(name_parts)
return result
def derive_metadata(family_folder):
"""Create a metadata object based on the contents of the folder."""
meta = fonts_pb2.FamilyProto()
font_files = glob(os.path.join(family_folder, "*.ttf"))
# grab the first font, arbitrarily
meta.name = derive_name(font_files[0])
fonts = [
fonts_pb2.FontProto(filename=os.path.basename(filename))
for filename in font_files
]
meta.fonts.extend(fonts)
return meta
def derive_cask(family_folder, repo_dir):
meta = derive_metadata(family_folder)
folder = os.path.relpath(family_folder, start=repo_dir)
early_access_file = os.path.join(family_folder, "EARLY_ACCESS.category")
early_access = os.path.exists(early_access_file)
return FontCask(folder, meta, early_access=early_access)
def run():
if len(sys.argv) != 3:
print(
"""Usage: ./import_google_fonts <path-to-repo> <mode>
Download the or clone the repository from https://github.com/google/fonts, then provide the path to the script.
"""
)
sys.exit(1)
repo_dir = sys.argv[1]
mode = sys.argv[2]
family_folders = find_family_folders(repo_dir)
existing_casks = find_google_casks()
added_casks = {}
updated_casks = {}
for family_folder in family_folders:
meta_file = os.path.join(family_folder, "METADATA.pb")
# check if the metadata file is present
# https://github.com/google/fonts/issues/2512
if os.path.exists(meta_file):
try:
cask = metadata_to_cask(meta_file, repo_dir)
except text_format.ParseError:
continue
else:
cask = derive_cask(family_folder, repo_dir)
# Ek Mukta has been renamed to just Mukta but still exists.
if cask.token() == 'font-ek-mukta':
continue
# Skip cask if already handled (i.e. if it exists in multiple license sub-directories).
if cask.token() in added_casks or cask.token() in updated_casks:
continue
existing_cask = existing_casks.pop(cask.token(), None)
if existing_cask:
cask.desc = existing_cask['description']
# If font is unreleased, re-use previous homepage URL.
if cask.homepage() != existing_cask['homepage']:
try:
urllib.request.urlopen(cask.homepage())
except urllib.request.URLError as e:
if e.code == 404:
cask.homepage_override = existing_cask['homepage']
updated_casks[cask.token()] = cask
else:
try:
urllib.request.urlopen(cask.homepage())
except urllib.request.URLError as e:
if e.code == 404:
cask.homepage_override = cask.meta.source.repository_url
added_casks[cask.token()] = cask
changed_casks = {}
print("Mode " + mode)
if not mode or mode == 'add':
print("Adding added casks")
changed_casks |= added_casks
if not mode or mode == 'update':
print("Adding updated casks")
changed_casks |= updated_casks
written_casks = 0
for token, cask in changed_casks.items():
if write_cask(cask):
written_casks += 1
# Limit cask changes per PR.
if mode is not None and written_casks >= 50:
break
if not mode or mode == 'delete':
# Delete casks which don't exist anymore.
for deleted_cask in existing_casks.values():
os.remove(deleted_cask["path"])
run()