amp/src/main/java/org/alfresco/extension/bulkimport/source/fs/FilesystemSourceUtils.java
/*
* Copyright (C) 2007 Peter Monks
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of an unsupported extension to Alfresco.
*
*/
package org.alfresco.extension.bulkimport.source.fs;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.math.BigDecimal;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.io.IOUtils;
import org.alfresco.repo.content.ContentStore;
import org.alfresco.repo.content.encoding.ContentCharsetFinder;
import org.alfresco.repo.content.filestore.FileContentStore;
import org.alfresco.service.cmr.repository.ContentData;
import org.alfresco.service.cmr.repository.MimetypeService;
import org.alfresco.extension.bulkimport.source.BulkImportItemVersion;
/**
* This class is a miscellaneous grab bag of filesystem methods that are
* intended to be statically imported.
*
* @author Peter Monks (pmonks@gmail.com)
*
*/
public final class FilesystemSourceUtils
{
private final static String DEFAULT_TEXT_ENCODING = "UTF-8";
private final static int MAX_CONTENT_URL_LENGTH = 255;
// Regexes for matching version files
public final static String VERSION_LABEL_REGEX = "([\\d]+)(\\.([\\d]+))?"; // Group 0 = version label, Group 1 = major version #, group 3 (if not null) = minor version #
private final static String VERSION_SUFFIX_REGEX = "\\.v(" + VERSION_LABEL_REGEX + ")\\z"; // Note: group numbers are one greater than shown above
private final static String VERSION_FILENAME_REGEX = ".+" + VERSION_SUFFIX_REGEX;
private final static Pattern VERSION_FILENAME_PATTERN = Pattern.compile(VERSION_FILENAME_REGEX);
/**
* Returns true if the suspectedChild is within the given directory. The
* String version is preferred over this one, as it forces normalisation
* of the two paths first.
*
* @param directory The directory in which to check <i>(may be null, although doing so will always return false)</i>.
* @param suspectedChild The suspect child to check for <i>(may be null, although doing so will always return false)</i>.
* @return true if and only if suspectedChild is "within" directory. Note that this comparison is done solely at a "path string"
* level. It will attempt to remove relative path elements (".." especially) to avoid incorrect results, but YMMV.
*/
public final static boolean isInDirectory(final File directory, final File suspectedChild)
{
return(isInDirectory(directory.getAbsolutePath(), suspectedChild.getAbsolutePath()));
}
/**
* Returns true if the suspectedChild is within the given directory. This
* method is preferred over the File version, as it forces normalisation
* of the two paths
* (see <a href="http://commons.apache.org/proper/commons-io/javadocs/api-2.4/org/apache/commons/io/FilenameUtils.html#normalize(java.lang.String, boolean)">this reference</a>)
* first.
*
* @param directoryPath The directory in which to check <i>(may be null, although doing so will always return false)</i>.
* @param suspectedChildPath The suspect child to check for <i>(may be null, although doing so will always return false)</i>.
* @return true if and only if suspectedChild is "within" directory. Note that this comparison is done solely at a "path string"
* level.
*/
public final static boolean isInDirectory(final String directoryPath, final String suspectedChildPath)
{
return(isInDirectoryImpl(new File(FilenameUtils.normalize(directoryPath, true)),
new File(FilenameUtils.normalize(suspectedChildPath, true))));
}
private final static boolean isInDirectoryImpl(final File directory, final File suspectedChild)
{
boolean result = false;
if (directory != null && suspectedChild != null)
{
if (suspectedChild.equals(directory))
{
result = true;
}
else
{
result = isInDirectoryImpl(directory, suspectedChild.getParentFile());
}
}
return(result);
}
/**
* Determines whether the given file is already located in an Alfresco managed content store. Used to determine
* whether to perform a streaming or in-place import.
*
* @param contentStore The content store Alfresco is configured to use <i>(must not be null)</i>.
* @param source The file to test. Typically this would be the source directory for the import <i>(must not be null)</i>.
* @return True if the given file is in an Alfresco managed content store, false otherwise.
*/
public final static boolean isInContentStore(final ContentStore contentStore, final File source)
{
boolean result = false;
final String contentStoreRoot = contentStore.getRootLocation();
if (contentStoreRoot != null && contentStoreRoot.trim().length() > 0)
{
final File contentStoreRootFile = new File(contentStoreRoot);
// If the content store root doesn't exist as a file, we're probably dealing with a non-filesystem content store
if (contentStoreRootFile.exists() && contentStoreRootFile.isDirectory())
{
result = isInDirectory(contentStoreRoot, source.getAbsolutePath());
}
}
return(result);
}
/**
* @param file The file to get the name of <i>(may be null)</i>.
* @return A human readable rendition of the file <i>(null when file is null)</i>.
*/
public final static String getFileName(final File file)
{
String result = null;
if (file != null)
{
result = file.getAbsolutePath();
}
return(result);
}
/**
* This method does the magic of constructing the content URL for
* "in-place" content.
*
* @param mimeTypeService The Alfresco MimetypeService <i>(must not be null)</i>.
* @param contentStore The content store Alfresco is configured to use <i>(must not be null)</i>.
* @param contentFile The content file to build a content URL for <i>(must not be null)</i>.
* @return The constructed <code>ContentData</code>, or null if the contentFile cannot be in-place imported for any reason.
*/
public final static ContentData buildContentProperty(final MimetypeService mimeTypeService, final ContentStore contentStore, final File contentFile)
{
ContentData result = null;
final String normalisedFilename = FilenameUtils.normalize(contentFile.getAbsolutePath(), true);
String normalisedContentStoreRoot = FilenameUtils.normalize(contentStore.getRootLocation(), true);
// Ensure content store root ends with a single / character
if (!normalisedContentStoreRoot.endsWith("/"))
{
normalisedContentStoreRoot = normalisedContentStoreRoot + "/";
}
// If, after normalisation, the filename doesn't start with the content store root, we can't in-place import
if (normalisedFilename.startsWith(normalisedContentStoreRoot))
{
final String contentStoreRelativeFilename = normalisedFilename.substring(normalisedContentStoreRoot.length());
final String contentUrl = FileContentStore.STORE_PROTOCOL + ContentStore.PROTOCOL_DELIMITER + contentStoreRelativeFilename;
// If the resulting content URL would be too long, we can't in-place import
if (contentUrl.length() <= MAX_CONTENT_URL_LENGTH)
{
final String mimeType = mimeTypeService.guessMimetype(contentFile.getName());
final String encoding = guessEncoding(mimeTypeService, contentFile, mimeType);
result = new ContentData(contentUrl, mimeType, contentFile.length(), encoding);
}
}
return(result);
}
/**
* Attempt to guess the encoding of a text file , falling back to {@link #DEFAULT_TEXT_ENCODING}.
*
* @param mimeTypeService The Alfresco MimetypeService <i>(must not be null)</i>.
* @param file The {@link java.io.File} to test <i>(must not be null)</i>.
* @param mimeType The file MIME type. Used to first distinguish between binary and text files <i>(must not be null)</i>.
* @return The text encoding as a {@link String}.
*/
public final static String guessEncoding(final MimetypeService mimeTypeService, final File file, final String mimeType)
{
String result = DEFAULT_TEXT_ENCODING;
InputStream is = null;
final ContentCharsetFinder charsetFinder = mimeTypeService.getContentCharsetFinder();
if (mimeTypeService.isText(mimeType))
{
try
{
is = new BufferedInputStream(new FileInputStream(file));
result = charsetFinder.getCharset(is, mimeType).name();
}
catch (final IOException ioe)
{
result = DEFAULT_TEXT_ENCODING;
}
finally
{
IOUtils.closeQuietly(is);
}
}
return(result);
}
/**
* Strips the version suffix (if any) from a filename.
*
* @param fileName The filename to strip the version suffix from <i>(must not be null, empty or blank)</i>.
* @return The filename with the version suffix (if any) stripped.
*/
public static String stripVersionSuffix(final String fileName)
{
String result = fileName;
if (isVersionFile(result))
{
result = result.replaceFirst(VERSION_SUFFIX_REGEX, "");
}
return(result);
}
/**
* Get the name of the parent file for this file.
*
* @param metadataLoader The configured <code>MetadataLoader</code> <i>(must not be null)</i>.
* @param fileName The filename to check <i>(must not be null, empty or blank)</i>.
* @return The name of the parent file this file.
*/
public static String getParentName(final MetadataLoader metadataLoader, final String fileName)
{
String result = stripVersionSuffix(fileName);
if (isMetadataFile(metadataLoader, result))
{
result = result.substring(0, result.length() - (MetadataLoader.METADATA_SUFFIX + metadataLoader.getMetadataFileExtension()).length());
}
return(result);
}
/**
* @param fileName The filename to check <i>(must not be null, empty or blank)</i>.
* @return True if the given filename represents a version file, false otherwise.
*/
public static boolean isVersionFile(final String fileName)
{
Matcher matcher = VERSION_FILENAME_PATTERN.matcher(fileName);
return(matcher.matches());
}
/**
* @param metadataLoader The configured <code>MetadataLoader</code> <i>(must not be null)</i>.
* @param fileName The filename to check <i>(must not be null, empty or blank)</i>.
* @return True if the given filename represents a metadata file, false otherwise.
*/
public static boolean isMetadataFile(final MetadataLoader metadataLoader, final String fileName)
{
boolean result = false;
if (metadataLoader != null)
{
final String tmpFileName = stripVersionSuffix(fileName);
result = tmpFileName.endsWith(MetadataLoader.METADATA_SUFFIX + metadataLoader.getMetadataFileExtension());
}
return(result);
}
/**
* @param fileName The filename to check <i>(must not be null, empty or blank)</i>.
* @return The version label for the given filename, or <code>Version.VERSION_HEAD</code> if it doesn't have one.
*/
public static BigDecimal getVersionNumber(final String fileName)
{
BigDecimal result = null;
if (fileName != null)
{
Matcher m = VERSION_FILENAME_PATTERN.matcher(fileName);
if (m.matches())
{
result = new BigDecimal(m.group(1)); // Group 1 = version label, including full stop separator for decimal version numbers
}
else
{
result = BulkImportItemVersion.VERSION_HEAD; // Filename doesn't include a version label, so its version is HEAD
}
}
return(result);
}
}