pycran/__init__.py
"""Parse CRAN package metadata"""
from typing import Dict, Generator, Optional, Set
from pycran.typings import BytesOrString, PathOrTarFile
from pycran.util import as_string, read_description
__version__ = "0.2.0"
def parse(data: BytesOrString) -> Generator:
"""Parses CRAN package metadata from
https://cran.r-project.org/src/contrib/PACKAGES
and returns the list of dictionaries.
Note: long whitespaces and new lines are stripped.
Args:
data (BytesOrString): raw text from the package list
Returns:
(Generator): each entry from packages as dictionary
"""
fields: Set = set()
package: Dict = {}
def append(field_value: BytesOrString):
pairs = list(package.items())
if pairs:
last_field = pairs[-1][0]
package[last_field] += field_value
# We want to iterate over each line and accumulate
# keys in dictionary, once we meet the same key
# in our dictionary we have a single package
# metadata parsed so we yield and repeat again.
for line in data.splitlines():
line = as_string(line)
if not line.strip():
continue
if ":" in line:
parts = line.split(":", maxsplit=1)
field = parts[0].strip()
value = str("".join(parts[1:]).strip())
if not field[0].isalpha():
field = ""
value = line
if field and field in fields:
fields = {field}
result = {**package}
package = {field: value}
if result:
yield result
else:
# Here we want to parse dangling lines
# like the ones with long dependency
# list, `R (>= 2.15.0), xtable, pbapply ... \n and more`
if field:
package[field] = value.strip()
fields.add(field)
else:
append(f" {value.strip()}")
else:
append(f" {line.strip()}")
# We also need to return the metadata for
# the last parsed package.
if package:
yield package
def encode(metadata: Dict) -> Optional[str]:
"""Dump dictionary into the following form
Package: A3
Version: 1.0.0
Depends: R (>= 2.15.0), xtable, pbapply
Suggests: randomForest, e1071
License: GPL (>= 2)
MD5sum: 027ebdd8affce8f0effaecfcd5f5ade2
NeedsCompilation: no
Args:
metadata (Dict): Converts metadata dictionary to deb format
Returns:
(Optional[str]): package record as deb format
"""
return "\n".join([f"{key}: {value}" for key, value in metadata.items()])
def decode(metadata: BytesOrString) -> Optional[Dict]:
"""Parse package metadata
Note: it is a shorthand to `parse`
then extracts the first value from it.
Input should be in the following format
which is R package metadata description
see: https://cran.r-project.org/src/contrib/PACKAGES
Package: A3
Version: 1.0.0
Depends: R (>= 2.15.0), xtable, pbapply
Suggests: randomForest, e1071
License: GPL (>= 2)
MD5sum: 027ebdd8affce8f0effaecfcd5f5ade2
NeedsCompilation: no
Args:
metadata (str): metadata text information
Returns:
(Optional[Dict]): Parse deb format and return dictionary
"""
try:
[package, *_rest] = list(parse(metadata))
return package
except (ValueError, TypeError):
return None
def from_file(archive: PathOrTarFile) -> Optional[Dict]:
"""Load and parse CRAN package archive
Args:
archive (PathOrTarFile): path to archive or `tarfile.TarFile` instance
Returns:
(dict): Dictionary of R package metadata
"""
return decode(read_description(archive))