amp/src/main/java/org/alfresco/extension/bulkimport/source/fs/DirectoryAnalyser.java
/*
* Copyright (C) 2007 Peter Monks
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of an unsupported extension to Alfresco.
*
*/
package org.alfresco.extension.bulkimport.source.fs;
import java.io.File;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.NavigableSet;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.TreeSet;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.alfresco.repo.content.ContentStore;
import org.alfresco.service.ServiceRegistry;
import org.alfresco.util.Pair;
import org.alfresco.extension.bulkimport.source.BulkImportSourceStatus;
import static org.alfresco.extension.bulkimport.util.LogUtils.*;
import static org.alfresco.extension.bulkimport.source.fs.FilesystemSourceUtils.*;
/**
* This interface defines a directory analyser. This is the process by which
* the contents of a source directory are grouped together into a list of
* <code>FilesystemBulkImportItem</code>s.
*
* @author Peter Monks (pmonks@gmail.com)
*/
public final class DirectoryAnalyser
{
private final static Log log = LogFactory.getLog(DirectoryAnalyser.class);
// Status counters
private final static String COUNTER_NAME_FILES_SCANNED = "Files scanned";
private final static String COUNTER_NAME_DIRECTORIES_SCANNED = "Directories scanned";
private final static String COUNTER_NAME_UNREADABLE_ENTRIES = "Unreadable entries";
private final static String[] COUNTER_NAMES = { COUNTER_NAME_FILES_SCANNED,
COUNTER_NAME_DIRECTORIES_SCANNED,
COUNTER_NAME_UNREADABLE_ENTRIES };
private final ServiceRegistry serviceRegistry;
private final ContentStore configuredContentStore;
private final MetadataLoader metadataLoader;
private BulkImportSourceStatus importStatus;
public DirectoryAnalyser(final ServiceRegistry serviceRegistry,
final ContentStore configuredContentStore,
final MetadataLoader metadataLoader)
{
// PRECONDITIONS
assert serviceRegistry != null : "serviceRegistry must not be null.";
assert configuredContentStore != null : "configuredContentStore must not be null.";
assert metadataLoader != null : "metadataLoader must not be null.";
assert importStatus != null : "importStatus must not be null.";
// Body
this.serviceRegistry = serviceRegistry;
this.configuredContentStore = configuredContentStore;
this.metadataLoader = metadataLoader;
}
public void init(final BulkImportSourceStatus importStatus)
{
this.importStatus = importStatus;
importStatus.preregisterSourceCounters(COUNTER_NAMES);
}
/**
* Analyses the given directory.
*
* @param sourceDirectory The source directory for the entire import (note: <u>must</u> be a directory) <i>(must not be null)</i>.
* @param directory The directory to analyse (note: <u>must</u> be a directory) <i>(must not be null)</i>.
* @return An <code>AnalysedDirectory</code> object <i>(will not be null)</i>.
* @throws InterruptedException If the thread executing the method is interrupted.
*/
public Pair<List<FilesystemBulkImportItem>, List<FilesystemBulkImportItem>> analyseDirectory(final File sourceDirectory, final File directory)
throws InterruptedException
{
// PRECONDITIONS
if (sourceDirectory == null) throw new IllegalArgumentException("sourceDirectory cannot be null.");
if (directory == null) throw new IllegalArgumentException("directory cannot be null.");
// Body
if (debug(log)) debug(log, "Analysing directory " + getFileName(directory) + "...");
Pair<List<FilesystemBulkImportItem>, List<FilesystemBulkImportItem>> result = null;
File[] directoryListing = null;
long analysisStart = 0L;
long analysisEnd = 0L;
long start = 0L;
long end = 0L;
String sourceRelativeParentDirectory = sourceDirectory.toPath().relativize(directory.toPath()).toString(); // Note: JDK 1.7 specific
// List the directory
start = System.nanoTime();
analysisStart = start;
directoryListing = directory.listFiles();
end = System.nanoTime();
if (trace(log)) trace(log, "List directory (" + directoryListing.length + " entries) took: " + (float)(end - start) / (1000 * 1000 * 1000) + "s.");
// Build up the list of items from the directory listing
start = System.nanoTime();
result = analyseDirectory(sourceRelativeParentDirectory, directoryListing);
end = System.nanoTime();
if (trace(log)) trace(log, "Convert directory listing to set of filesystem import items took: " + (float)(end - start) / (1000 * 1000 * 1000) + "s.");
analysisEnd = end;
if (debug(log)) debug(log, "Finished analysing directory " + getFileName(directory) + ", in " + (float)(analysisEnd - analysisStart) / (1000 * 1000 * 1000) + "s.");
return(result);
}
private Pair<List<FilesystemBulkImportItem>, List<FilesystemBulkImportItem>> analyseDirectory(final String sourceRelativeParentDirectory, final File[] directoryListing)
throws InterruptedException
{
Pair<List<FilesystemBulkImportItem>, List<FilesystemBulkImportItem>> result = null;
if (directoryListing != null)
{
// This needs some Clojure, desperately...
Map<String, SortedMap<BigDecimal, Pair<File, File>>> categorisedFiles = categoriseFiles(directoryListing);
if (debug(log)) debug(log, "Categorised files: " + String.valueOf(categorisedFiles));
result = constructImportItems(sourceRelativeParentDirectory, categorisedFiles);
}
return(result);
}
private Map<String, SortedMap<BigDecimal, Pair<File, File>>> categoriseFiles(final File[] directoryListing)
throws InterruptedException
{
Map<String, SortedMap<BigDecimal, Pair<File, File>>> result = null;
if (directoryListing != null)
{
result = new HashMap<>();
for (final File file : directoryListing)
{
if (importStatus.isStopping() || Thread.currentThread().isInterrupted()) throw new InterruptedException(Thread.currentThread().getName() + " was interrupted. Terminating early.");
categoriseFile(result, file);
}
}
return(result);
}
/*
* This method does the hard work of figuring out where the file belongs (which parent item, and where in that item's
* version history).
*/
private void categoriseFile(final Map<String, SortedMap<BigDecimal, Pair<File, File>>> categorisedFiles, final File file)
{
if (file != null)
{
if (file.canRead())
{
final String fileName = file.getName();
final String parentName = getParentName(metadataLoader, fileName);
final boolean isMetadata = isMetadataFile(metadataLoader, fileName);
final BigDecimal versionNumber = getVersionNumber(fileName);
SortedMap<BigDecimal, Pair<File, File>> versions = categorisedFiles.get(parentName);
// Find the item
if (versions == null)
{
versions = new TreeMap<>();
categorisedFiles.put(parentName, versions);
}
// Find the version within the item
Pair<File, File> version = versions.get(versionNumber);
if (version == null)
{
version = new Pair<>(null, null);
}
// Categorise the incoming file in that version of the item
if (isMetadata)
{
version = new Pair<>(version.getFirst(), file);
}
else
{
version = new Pair<>(file, version.getSecond());
}
versions.put(versionNumber, version);
if (file.isDirectory())
{
importStatus.incrementSourceCounter(COUNTER_NAME_DIRECTORIES_SCANNED);
}
else
{
importStatus.incrementSourceCounter(COUNTER_NAME_FILES_SCANNED);
}
}
else
{
if (warn(log)) warn(log, "Skipping '" + getFileName(file) + "' as Alfresco does not have permission to read it.");
importStatus.incrementSourceCounter(COUNTER_NAME_UNREADABLE_ENTRIES);
}
}
}
private Pair<List<FilesystemBulkImportItem>, List<FilesystemBulkImportItem>> constructImportItems(final String sourceRelativeParentDirectory,
final Map<String, SortedMap<BigDecimal,Pair<File,File>>> categorisedFiles)
throws InterruptedException
{
Pair<List<FilesystemBulkImportItem>, List<FilesystemBulkImportItem>> result = null;
if (categorisedFiles != null)
{
final List<FilesystemBulkImportItem> directoryItems = new ArrayList<>();
final List<FilesystemBulkImportItem> fileItems = new ArrayList<>();
result = new Pair<>(directoryItems, fileItems);
for (final String parentName : categorisedFiles.keySet())
{
if (importStatus.isStopping() || Thread.currentThread().isInterrupted()) throw new InterruptedException(Thread.currentThread().getName() + " was interrupted. Terminating early.");
final SortedMap<BigDecimal,Pair<File,File>> itemVersions = categorisedFiles.get(parentName);
final NavigableSet<FilesystemBulkImportItemVersion> versions = constructImportItemVersions(itemVersions);
final boolean isDirectory = versions.last().isDirectory();
final FilesystemBulkImportItem item = new FilesystemBulkImportItem(parentName,
isDirectory,
sourceRelativeParentDirectory,
versions);
if (isDirectory)
{
directoryItems.add(item);
}
else
{
fileItems.add(item);
}
}
}
return(result);
}
private final NavigableSet<FilesystemBulkImportItemVersion> constructImportItemVersions(final SortedMap<BigDecimal,Pair<File,File>> itemVersions)
throws InterruptedException
{
// PRECONDITIONS
if (itemVersions == null) throw new IllegalArgumentException("itemVersions cannot be null.");
if (itemVersions.size() <= 0) throw new IllegalArgumentException("itemVersions cannot be empty.");
// Body
final NavigableSet<FilesystemBulkImportItemVersion> result = new TreeSet<>();
for (final BigDecimal versionNumber : itemVersions.keySet())
{
if (importStatus.isStopping() || Thread.currentThread().isInterrupted()) throw new InterruptedException(Thread.currentThread().getName() + " was interrupted. Terminating early.");
final Pair<File,File> contentAndMetadataFiles = itemVersions.get(versionNumber);
final FilesystemBulkImportItemVersion version = new FilesystemBulkImportItemVersion(serviceRegistry,
configuredContentStore,
metadataLoader,
versionNumber,
contentAndMetadataFiles.getFirst(),
contentAndMetadataFiles.getSecond());
result.add(version);
}
return(result);
}
}