pmonks/alfresco-bulk-import

View on GitHub
amp/src/main/java/org/alfresco/extension/bulkimport/source/fs/FilesystemSourceUtils.java

Summary

Maintainability
A
25 mins
Test Coverage
/*
 * Copyright (C) 2007 Peter Monks
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * 
 * This file is part of an unsupported extension to Alfresco.
 * 
 */

package org.alfresco.extension.bulkimport.source.fs;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.math.BigDecimal;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.io.FilenameUtils;
import org.apache.commons.io.IOUtils;
import org.alfresco.repo.content.ContentStore;
import org.alfresco.repo.content.encoding.ContentCharsetFinder;
import org.alfresco.repo.content.filestore.FileContentStore;
import org.alfresco.service.cmr.repository.ContentData;
import org.alfresco.service.cmr.repository.MimetypeService;
import org.alfresco.extension.bulkimport.source.BulkImportItemVersion;


/**
 * This class is a miscellaneous grab bag of filesystem methods that are
 * intended to be statically imported.
 *
 * @author Peter Monks (pmonks@gmail.com)
 *
 */
public final class FilesystemSourceUtils
{
    private final static String DEFAULT_TEXT_ENCODING  = "UTF-8";
    private final static int    MAX_CONTENT_URL_LENGTH = 255;
    
    // Regexes for matching version files
    public  final static String  VERSION_LABEL_REGEX      = "([\\d]+)(\\.([\\d]+))?"; // Group 0 = version label, Group 1 = major version #, group 3 (if not null) = minor version #
    private final static String  VERSION_SUFFIX_REGEX     = "\\.v(" + VERSION_LABEL_REGEX + ")\\z"; // Note: group numbers are one greater than shown above
    private final static String  VERSION_FILENAME_REGEX   = ".+" + VERSION_SUFFIX_REGEX;
    private final static Pattern VERSION_FILENAME_PATTERN = Pattern.compile(VERSION_FILENAME_REGEX);
    
    /**
     * Returns true if the suspectedChild is within the given directory.  The
     * String version is preferred over this one, as it forces normalisation
     * of the two paths first.
     * 
     * @param directory      The directory in which to check <i>(may be null, although doing so will always return false)</i>.
     * @param suspectedChild The suspect child to check for <i>(may be null, although doing so will always return false)</i>.
     * @return true if and only if suspectedChild is "within" directory.  Note that this comparison is done solely at a "path string"
     *         level.  It will attempt to remove relative path elements (".." especially) to avoid incorrect results, but YMMV.
     */
    public final static boolean isInDirectory(final File directory, final File suspectedChild)
    {
        return(isInDirectory(directory.getAbsolutePath(), suspectedChild.getAbsolutePath()));
    }
    

    /**
     * Returns true if the suspectedChild is within the given directory.  This
     * method is preferred over the File version, as it forces normalisation
     * of the two paths
     * (see <a href="http://commons.apache.org/proper/commons-io/javadocs/api-2.4/org/apache/commons/io/FilenameUtils.html#normalize(java.lang.String, boolean)">this reference</a>)
     * first.
     * 
     * @param directoryPath      The directory in which to check <i>(may be null, although doing so will always return false)</i>.
     * @param suspectedChildPath The suspect child to check for <i>(may be null, although doing so will always return false)</i>.
     * @return true if and only if suspectedChild is "within" directory.  Note that this comparison is done solely at a "path string"
     *         level.
     */
    public final static boolean isInDirectory(final String directoryPath, final String suspectedChildPath)
    {
        return(isInDirectoryImpl(new File(FilenameUtils.normalize(directoryPath,      true)),
                                 new File(FilenameUtils.normalize(suspectedChildPath, true))));
    }
    
    
    private final static boolean isInDirectoryImpl(final File directory, final File suspectedChild)
    {
        boolean result = false;
        
        if (directory != null && suspectedChild != null)
        {
            if (suspectedChild.equals(directory))
            {
                result = true;
            }
            else
            {
                result = isInDirectoryImpl(directory, suspectedChild.getParentFile());
            }
        }
        
        return(result);
    }
 

    /**
     * Determines whether the given file is already located in an Alfresco managed content store.  Used to determine
     * whether to perform a streaming or in-place import.
     * 
     * @param contentStore The content store Alfresco is configured to use <i>(must not be null)</i>.
     * @param source The file to test.  Typically this would be the source directory for the import <i>(must not be null)</i>.
     * @return True if the given file is in an Alfresco managed content store, false otherwise.
     */
    public final static boolean isInContentStore(final ContentStore contentStore, final File source)
    {
        boolean      result           = false;
        final String contentStoreRoot = contentStore.getRootLocation();
        
        if (contentStoreRoot != null && contentStoreRoot.trim().length() > 0)
        {
            final File contentStoreRootFile = new File(contentStoreRoot);
            
            // If the content store root doesn't exist as a file, we're probably dealing with a non-filesystem content store
            if (contentStoreRootFile.exists() && contentStoreRootFile.isDirectory())
            {
                result = isInDirectory(contentStoreRoot, source.getAbsolutePath());
            }
        }

        return(result);
    }
    
    
    /**
     * @param file The file to get the name of <i>(may be null)</i>.
     * @return A human readable rendition of the file <i>(null when file is null)</i>.
     */
    public final static String getFileName(final File file)
    {
        String result = null;
        
        if (file != null)
        {
            result = file.getAbsolutePath();
        }
        
        return(result);
    }

    
    /**
     * This method does the magic of constructing the content URL for
     * "in-place" content.
     * 
     * @param mimeTypeService The Alfresco MimetypeService <i>(must not be null)</i>.
     * @param contentStore    The content store Alfresco is configured to use <i>(must not be null)</i>.
     * @param contentFile     The content file to build a content URL for <i>(must not be null)</i>.
     * @return The constructed <code>ContentData</code>, or null if the contentFile cannot be in-place imported for any reason.
     */
    public final static ContentData buildContentProperty(final MimetypeService mimeTypeService, final ContentStore contentStore, final File contentFile)
    {
        ContentData result = null;
        
        final String normalisedFilename         = FilenameUtils.normalize(contentFile.getAbsolutePath(), true);
        String       normalisedContentStoreRoot = FilenameUtils.normalize(contentStore.getRootLocation(), true);
        
        // Ensure content store root ends with a single / character
        if (!normalisedContentStoreRoot.endsWith("/"))
        {
            normalisedContentStoreRoot = normalisedContentStoreRoot + "/";
        }
        
        // If, after normalisation, the filename doesn't start with the content store root, we can't in-place import
        if (normalisedFilename.startsWith(normalisedContentStoreRoot))
        {
            final String contentStoreRelativeFilename = normalisedFilename.substring(normalisedContentStoreRoot.length());
            final String contentUrl                   = FileContentStore.STORE_PROTOCOL + ContentStore.PROTOCOL_DELIMITER + contentStoreRelativeFilename;
    
            // If the resulting content URL would be too long, we can't in-place import
            if (contentUrl.length() <= MAX_CONTENT_URL_LENGTH)
            {
                final String mimeType = mimeTypeService.guessMimetype(contentFile.getName());
                final String encoding = guessEncoding(mimeTypeService, contentFile, mimeType);
                
                result = new ContentData(contentUrl, mimeType, contentFile.length(), encoding);
            }
        }
        
        return(result);
    }
    
    
    /**
     * Attempt to guess the encoding of a text file , falling back to {@link #DEFAULT_TEXT_ENCODING}.
     *
     * @param mimeTypeService The Alfresco MimetypeService <i>(must not be null)</i>.
     * @param file            The {@link java.io.File} to test <i>(must not be null)</i>.
     * @param mimeType        The file MIME type. Used to first distinguish between binary and text files <i>(must not be null)</i>.
     * @return The text encoding as a {@link String}.
     */
    public final static String guessEncoding(final MimetypeService mimeTypeService, final File file, final String mimeType)
    {
        String                     result        = DEFAULT_TEXT_ENCODING;
        InputStream                is            = null;
        final ContentCharsetFinder charsetFinder = mimeTypeService.getContentCharsetFinder();

        if (mimeTypeService.isText(mimeType))
        {
            try
            {
               is     = new BufferedInputStream(new FileInputStream(file));
               result = charsetFinder.getCharset(is, mimeType).name();
            }
            catch (final IOException ioe)
            {
                result = DEFAULT_TEXT_ENCODING;
            }
            finally
            {
                IOUtils.closeQuietly(is);
            }
        }
        
        return(result);
    }
    
    
    /**
     * Strips the version suffix (if any) from a filename.
     * 
     * @param fileName The filename to strip the version suffix from <i>(must not be null, empty or blank)</i>.
     * @return The filename with the version suffix (if any) stripped.
     */
    public static String stripVersionSuffix(final String fileName)
    {
        String result = fileName;
        
        if (isVersionFile(result))
        {
            result = result.replaceFirst(VERSION_SUFFIX_REGEX, "");
        }
        
        return(result);
    }

    
    /**
     * Get the name of the parent file for this file.
     * 
     * @param metadataLoader The configured <code>MetadataLoader</code> <i>(must not be null)</i>.
     * @param fileName       The filename to check <i>(must not be null, empty or blank)</i>.
     * @return The name of the parent file this file.
     */
    public static String getParentName(final MetadataLoader metadataLoader, final String fileName)
    {
        String result = stripVersionSuffix(fileName);
        
        if (isMetadataFile(metadataLoader, result))
        {
            result = result.substring(0, result.length() - (MetadataLoader.METADATA_SUFFIX + metadataLoader.getMetadataFileExtension()).length());
        }
        
        return(result);
    }
    
    
    /**
     * @param fileName The filename to check <i>(must not be null, empty or blank)</i>.
     * @return True if the given filename represents a version file, false otherwise.
     */
    public static boolean isVersionFile(final String fileName)
    {
        Matcher matcher = VERSION_FILENAME_PATTERN.matcher(fileName);

        return(matcher.matches());
    }
    

    /**
     * @param metadataLoader The configured <code>MetadataLoader</code> <i>(must not be null)</i>.
     * @param fileName       The filename to check <i>(must not be null, empty or blank)</i>.
     * @return True if the given filename represents a metadata file, false otherwise.
     */
    public static boolean isMetadataFile(final MetadataLoader metadataLoader, final String fileName)
    {
        boolean result = false;
        
        if (metadataLoader != null)
        {
            final String tmpFileName = stripVersionSuffix(fileName);
            
            result = tmpFileName.endsWith(MetadataLoader.METADATA_SUFFIX + metadataLoader.getMetadataFileExtension());
        }

        return(result);
    }
    
    
    /**
     * @param fileName The filename to check <i>(must not be null, empty or blank)</i>.
     * @return The version label for the given filename, or <code>Version.VERSION_HEAD</code> if it doesn't have one.
     */
    public static BigDecimal getVersionNumber(final String fileName)
    {
        BigDecimal result = null;
        
        if (fileName != null)
        {
            Matcher m = VERSION_FILENAME_PATTERN.matcher(fileName);
            
            if (m.matches())
            {
                result = new BigDecimal(m.group(1));  // Group 1 = version label, including full stop separator for decimal version numbers
            }
            else
            {
                result = BulkImportItemVersion.VERSION_HEAD;  // Filename doesn't include a version label, so its version is HEAD
            }
        }
        
        return(result);
    }
    
}