Commit 01bd53fe authored by toaster's avatar toaster
Browse files

moved to in-memory collection comparison

git-svn-id: https://subversion.umiacs.umd.edu/ace/trunk@126 f1b3a171-7291-4a19-a512-95ad0ad9394a
parent fcdef57d
/*
* Copyright (c) 2007-2011, University of Maryland
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted provided
* that the following conditions are met:
*
* Redistributions of source code must retain the above copyright notice, this list of conditions
* and the following disclaimer.
*
* Redistributions in binary form must reproduce the above copyright notice, this list of conditions
* and the following disclaimer in the documentation and/or other materials provided with the
* distribution.
*
* Neither the name of the University of Maryland nor the names of its contributors may be used to
* endorse or promote products derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
* PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
* TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* ACE Components were written in the ADAPT Project at the University of
* Maryland Institute for Advanced Computer Study.
*/
package edu.umiacs.ace.monitor.compare;
import edu.umiacs.ace.monitor.core.Collection;
import edu.umiacs.ace.monitor.core.MonitoredItem;
import edu.umiacs.ace.util.PersistUtil;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.persistence.EntityManager;
import javax.persistence.Query;
import org.apache.log4j.Logger;
/**
* Currently does all comparison in memory, this limits collections to a million
* items or so, but prevents db thrashing.
*
*
* @author toaster
*/
public final class CollectionCompare2 {
private Map<String, String> sourceMap = new HashMap<String, String>();
private Map<String, String> sourceReverseMap = new HashMap<String, String>();
private static final Logger LOG = Logger.getLogger(CollectionCompare2.class);
private List<String> parseErrors = new ArrayList<String>();
public CollectionCompare2(InputStream sourceFile, String prefix) {
try {
parseInputStream(sourceFile, prefix);
} catch (IOException e) {
LOG.error("Error reading digest source", e);
throw new RuntimeException(e);
}
}
Map<String, String> getSourceMap() {
return sourceMap;
}
public List<String> getParseErrors() {
return Collections.unmodifiableList(parseErrors);
}
public void compareTo(CompareResults cr, Collection c, MonitoredItem item) {
EntityManager em = PersistUtil.getEntityManager();
long time = System.currentTimeMillis();
long total = 0;
try {
LOG.info("Starting collection compare on " + c.getName() + " source size: " + sourceMap.size());
Query q = em.createNamedQuery("MonitoredItem.listFilesInCollection");
q.setParameter("coll", c);
List items = q.getResultList();
for (Object o : items) {
total++;
MonitoredItem aceItem = (MonitoredItem) o;
String acePath = aceItem.getPath();
String aceDigest = aceItem.getFileDigest();
if (sourceMap.containsKey(acePath)) {
cr.fileExistsAtTarget(acePath);
if (sourceMap.get(acePath).matches(aceDigest)) {
//Perfect file,
} else {
cr.mismatchedDigests(acePath, sourceMap.get(acePath), aceDigest);
}
} else if (sourceReverseMap.containsKey(aceDigest)) {
cr.fileExistsAtTarget(sourceReverseMap.get(aceDigest));
cr.mismatchedNames(aceDigest, acePath, sourceReverseMap.get(aceDigest));
} else {
cr.sourceFileNotFound(acePath);
}
}
} catch (Exception e) {
LOG.error("Error during load and compare: ", e);
} finally {
LOG.info("Finished collection compare on: "
+ c.getName() + " time: " + (System.currentTimeMillis() - time) + " tested: " + total);
cr.finished();
em.close();
}
}
private void parseInputStream(InputStream sourceFile, String prefix) throws IOException {
BufferedReader input = new BufferedReader(new InputStreamReader(sourceFile));
String line = input.readLine();
if (line != null && line.matches("^[A-Z0-9\\-]+:.+$")) {
line = input.readLine();
}
while (line != null) {
String tokens[] = line.split("\\s+", 2);
if (tokens == null || tokens.length != 2) {
LOG.error("Error processing line: " + line);
parseErrors.add("Corrupt Line: " + line);
} else {
sourceMap.put(tokens[1], tokens[0]);
sourceReverseMap.put(tokens[0], tokens[1]);
}
line = input.readLine();
}
}
}
/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package edu.umiacs.ace.monitor.compare;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
/**
* target = local ace install
* source = remote/supplied digest list
* @author toaster
*/
public final class CompareResults {
private Set<String> unseenSupplied;
private Set<String> unseenTarget = new HashSet<String>();
private Set<DifferingName> differingNames = new TreeSet<DifferingName>();
private Set<DifferingDigest> differingDigests = new TreeSet<DifferingDigest>();
private boolean running = true;
private String message = null;
public CompareResults(CollectionCompare2 cc2) {
unseenSupplied = new HashSet<String>(cc2.getSourceMap().keySet());
}
void finished()
{
running = false;
}
public boolean isRunning() {
return running;
}
public String getMessage() {
return message;
}
void fileExistsAtTarget(String file) {
unseenSupplied.remove(file);
}
void sourceFileNotFound(String file) {
unseenTarget.add(file);
}
void mismatchedDigests(String file, String sourceDigest, String targetDigest) {
differingDigests.add(new DifferingDigest(file, sourceDigest, targetDigest));
}
void mismatchedNames(String digest, String sourcename, String targetName) {
differingNames.add(new DifferingName(sourcename, targetName, digest));
}
/**
* Files that exist in the collection, but not target
* @return
*/
public Set<String> getUnseenSuppliedFiles() {
return unseenSupplied;
}
/**
* Files that exist in the target but not collection
* @return
*/
public Set<String> getUnseenTargetFiles() {
return Collections.unmodifiableSet(unseenTarget);
}
public Set<DifferingDigest> getDifferingDigests() {
return differingDigests;
}
public Set<DifferingName> getDifferingNames() {
return differingNames;
}
public static class DifferingName implements Comparable<DifferingName> {
private String compString;
private String sourceName;
private String destinationName;
private String digest;
private DifferingName(String sourceName, String destinationName,
String digest) {
this.sourceName = sourceName;
this.destinationName = destinationName;
this.digest = digest;
compString = sourceName + destinationName + digest;
}
public String getDestinationName() {
return destinationName;
}
public String getDigest() {
return digest;
}
public String getSourceName() {
return sourceName;
}
public void setDestinationName(String destinationName) {
this.destinationName = destinationName;
}
public void setDigest(String digest) {
this.digest = digest;
}
public void setSourceName(String sourceName) {
this.sourceName = sourceName;
}
@Override
public int hashCode() {
return compString.hashCode();
}
@Override
public boolean equals(Object obj) {
if (obj instanceof DifferingName) {
DifferingName dd = (DifferingName) obj;
return compString.equals(dd.compString);
}
return false;
}
@Override
public int compareTo(DifferingName o) {
return compString.compareTo(o.compString);
}
}
public static class DifferingDigest implements Comparable<DifferingDigest> {
private String name;
private String sourceDigest;
private String targetDigest;
private String compString;
private DifferingDigest(String name, String sourceDigest,
String targetDigest) {
this.name = name;
this.sourceDigest = sourceDigest;
this.targetDigest = targetDigest;
compString = name + sourceDigest + targetDigest;
}
public String getName() {
return name;
}
public String getSourceDigest() {
return sourceDigest;
}
public String getTargetDigest() {
return targetDigest;
}
public void setName(String name) {
this.name = name;
}
public void setTargetDigest(String targetDigest) {
this.targetDigest = targetDigest;
}
public void setSourceDigest(String sourceDigest) {
this.sourceDigest = sourceDigest;
}
@Override
public int compareTo(DifferingDigest o) {
return compString.compareTo(o.compString);
}
@Override
public int hashCode() {
return compString.hashCode();
}
@Override
public boolean equals(Object obj) {
if (obj instanceof DifferingDigest) {
DifferingDigest dd = (DifferingDigest) obj;
return compString.equals(dd.compString);
}
return false;
}
}
}
......@@ -154,12 +154,13 @@ public class CompareServlet extends EntityManagerServlet {
LOG.debug(
"item " + monItem + " filter " + inputFilter + " loading attached file");
CollectionCompare cc = new CollectionCompare(
CollectionCompare2 cc = new CollectionCompare2(
item.openStream(), inputFilter);
Thread t = new Thread(new TableCompareRunnable(cc, c,
CompareResults cr = new CompareResults(cc);
Thread t = new Thread(new TableCompareRunnable(cr,cc, c,
monItem), "Compare Thread " + c.getName());
t.start();
session.setAttribute(PAGE_RESULTS, cc);
session.setAttribute(PAGE_RESULTS, cr);
}
}
......@@ -169,13 +170,15 @@ public class CompareServlet extends EntityManagerServlet {
// we have no attached file, load remote
if ( !fileAttached && partner != null && remoteCollection > 0 ) {
LOG.debug("Remote digest request " + partner.getRemoteURL());
CollectionCompare cc = new CollectionCompare(
CollectionCompare2 cc = new CollectionCompare2(
JsonGateway.getGateway().getDigestList(partner,
remoteCollection), inputFilter);
Thread t = new Thread(new TableCompareRunnable(cc, c,
CompareResults cr = new CompareResults(cc);
Thread t = new Thread(new TableCompareRunnable(cr,cc, c,
monItem), "Compare Thread " + c.getName());
t.start();
session.setAttribute(PAGE_RESULTS, cc);
session.setAttribute(PAGE_RESULTS, cr);
}
} catch ( FileUploadException ful ) {
throw new ServletException(ful);
......@@ -188,12 +191,14 @@ public class CompareServlet extends EntityManagerServlet {
static class TableCompareRunnable implements Runnable {
private CollectionCompare cc;
private CollectionCompare2 cc;
private Collection c;
private MonitoredItem baseItem;
private CompareResults cr;
private TableCompareRunnable( CollectionCompare cc, Collection c,
private TableCompareRunnable( CompareResults cr,CollectionCompare2 cc, Collection c,
MonitoredItem baseItem ) {
this.cr = cr;
this.cc = cc;
this.c = c;
this.baseItem = baseItem;
......@@ -202,12 +207,13 @@ public class CompareServlet extends EntityManagerServlet {
@Override
public void run() {
try {
cc.loadCollectionTable(c, baseItem);
cc.doCompare();
cc.getUnseenTargetFiles();
cc.getUnseenSuppliedFiles();
cc.compareTo(cr,c, baseItem);
// cc.loadCollectionTable(c, baseItem);
// cc.doCompare();
// cc.getUnseenTargetFiles();
// cc.getUnseenSuppliedFiles();
} finally {
cc.cleanup();
// cc.cleanup();
}
}
}
......
......@@ -82,6 +82,8 @@ import javax.persistence.TemporalType;
"SELECT m.parentCollection, count(m) FROM MonitoredItem m WHERE m.directory = false GROUP BY m.parentCollection"),
@NamedQuery(name = "MonitoresItem.countFilesInCollection", query =
"SELECT count(m) FROM MonitoredItem m WHERE m.directory = false AND m.parentCollection = :coll"),
@NamedQuery(name = "MonitoredItem.listFilesInCollection", query =
"SELECT m FROM MonitoredItem m WHERE m.directory = false AND m.parentCollection = :coll"),
@NamedQuery(name = "MonitoresItem.countDirectoriesInAllCollections", query =
"SELECT m.parentCollection, count(m) FROM MonitoredItem m WHERE m.directory = true GROUP BY m.parentCollection"),
@NamedQuery(name = "MonitoresItem.countDirectoriesCollection", query =
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment