Commit f7095365 authored by Eddie Schoute's avatar Eddie Schoute
Browse files

Expand on related work and fix optimization function

parent b7478ea9
......@@ -10,46 +10,65 @@
eprinttype = {arxiv},
}
@Comment{jabref-meta: databaseType:biblatex;}
@Article{Bender2008,
author = {Bender, Michael A. and Ge, Dongdong and He, Simai and Hu, Haodong and Pinter, Ron Y. and Skiena, Steven and Swidan, Firas},
date = {2008-08},
journaltitle = {Journal of Computer and System Sciences},
title = {Improved bounds on sorting by length-weighted reversals},
doi = {10.1016/j.jcss.2007.08.008},
number = {5},
pages = {744--774},
volume = {74},
publisher = {Elsevier},
}
@InBook{Knuth1998,
author = {Donald E. Knuth},
title = {Networks for Sorting},
booktitle = {The Art of Computer Programming: Volume 3: Sorting and Searching (2nd Edition)},
year = {1998},
publisher = {Addison-Wesley Professional},
isbn = {0201896850},
pages = {219--247},
@Article{Pinter2002,
author = {Pinter, Ron Y. and Skiena, Steven},
date = {2002},
journaltitle = {Genome Informatics},
title = {Genomic sorting with length-weighted reversals},
doi = {10.11234/gi1990.13.103},
pages = {103--111},
volume = {13},
publisher = {Japanese Society for Bioinformatics},
}
@article{Bender2008,
title={Improved bounds on sorting by length-weighted reversals},
author={Bender, Michael A and Ge, Dongdong and He, Simai and Hu, Haodong and Pinter, Ron Y and Skiena, Steven and Swidan, Firas},
journal={Journal of Computer and System Sciences},
volume={74},
number={5},
pages={744--774},
year={2008},
publisher={Elsevier}
@Article{Blanchette1996,
author = {Blanchette, Mathieu and Kunisawa, Takashi and Sankoff, David},
date = {1996-01},
journaltitle = {Gene},
title = {Parametric genome rearrangement},
doi = {10.1016/0378-1119(95)00878-0},
number = {1},
pages = {GC11--GC17},
volume = {172},
publisher = {Elsevier},
}
@article{Pinter2002,
title={Genomic sorting with length-weighted reversals},
author={Pinter, Ron Y and Skiena, Steven},
journal={Genome Informatics},
volume={13},
pages={103--111},
year={2002},
publisher={Japanese Society for Bioinformatics}
@Book{Knuth1998,
author = {Knuth, Donald E.},
date = {1998},
title = {The Art of Computer Programming},
edition = {2},
isbn = {0201896850},
pagetotal = {800},
publisher = {Addison-Wesley},
subtitle = {Sorting and Searching},
volume = {3},
ean = {9780201896855},
}
@article{Blanchette1996,
title={Parametric genome rearrangement},
author={Blanchette, Mathieu and Kunisawa, Takashi and Sankoff, David},
journal={Gene},
volume={172},
number={1},
pages={GC11--GC17},
year={1996},
publisher={Elsevier}
@Article{Kececioglu1995,
author = {J. Kececioglu and D. Sankoff},
title = {Exact and approximation algorithms for sorting by reversals, with application to genome rearrangement},
doi = {10.1007/bf01188586},
number = {1-2},
pages = {180--210},
volume = {13},
journal = {Algorithmica},
month = {2},
publisher = {Springer Science and Business Media {LLC}},
year = {1995},
}
@Comment{jabref-meta: databaseType:biblatex;}
......@@ -89,7 +89,11 @@ A reversal $\rev{i,j}$ needs an amount of time%
to be implemented, where $m=|j-i|+1$ is the number of nodes in the reversed segment, and $p(m) \coloneqq m \pmod{2}$ is the parity of $m$.
This is a bit of a mouthful to work with, so it may help to simply approximate this by $(m+1)/3$.
\paragraph{Our goals:} We are interested in routing any given permutation $\pi$ in a \emph{time} that is minimal. Note that reversals that do not overlap can be performed simultaneously, i.e. $\rev{i,j}$ and $\rev{k,l}$ with $i<j<k<l$ can be performed in parallel or in one layer of the circuit. The time for a single layer is the time of the slowest operation in that layer. Finally, the time taken to perform the full circuit is a sum of times for each layer in the circuit. So, we have a few goals in this project:
\paragraph{Our goals:} We are interested in routing any given permutation $\pi$ in a \emph{time} that is minimal.
We call the reversals $\rev{i,j}$ and $\rev{k,l}$ with $i<j<k<l$ \emph{independent}
and allow these to be performed in parallel.
We wish to minimize the time take to implement any permutation, allowing for concurrent independent reversals.
So, we have a few goals in this project:
\begin{enumerate}
\item Design an algorithm (or algorithms!) that given a permutation $\pi$ as input, implements $\pi$ using weighted reversals on the path.
\item Give a bound on the runtime of the algorithm.
......@@ -98,7 +102,7 @@ This is a bit of a mouthful to work with, so it may help to simply approximate t
\end{enumerate}
\section{Warm-up}
\paragraph{Routing with swaps:} Let's simplify the problem. Suppose you can only use neighboring nearest-neighbor reversals (or swaps) to permute tokens, and each swap takes time 1. What is the minimum time necessary to implement any permutation on the path this way? Note the close relation to sorting. In fact, the problem is the same: the list indices label the nodes of a path, and the (unsorted) integer at index $i$ labels the token $t(i)$. However, sorting measures time by total number of operations, while our time is measured in circuit depth. In bubble sort, you make neighboring comparisons and swap if the two neighbors are out of order. Sorting takes $O(n^2)$ operations to sort any list, but our time is measured not in operations but in circuit depth. It turns out that the list can be sorted in \emph{time} $n-o(n)$, where a single swap has time cost 1. The best known algorithm for this is the odd-even sort, which you can read about here,~\cite{Knuth1998}.
\paragraph{Routing with swaps:} Let's simplify the problem. Suppose you can only use neighboring nearest-neighbor reversals (or swaps) to permute tokens, and each swap takes time 1. What is the minimum time necessary to implement any permutation on the path this way? Note the close relation to sorting. In fact, the problem is the same: the list indices label the nodes of a path, and the (unsorted) integer at index $i$ labels the token $t(i)$. However, sorting measures time by total number of operations, while our time is measured in circuit depth. In bubble sort, you make neighboring comparisons and swap if the two neighbors are out of order. Sorting takes $O(n^2)$ operations to sort any list, but our time is measured not in operations but in circuit depth. It turns out that the list can be sorted in \emph{time} $n-o(n)$, where a single swap has time cost 1. The best known algorithm for this is the odd-even sort, which you can read about in~\cite[Ch.~5.3.4]{Knuth1998}.
\begin{enumerate}
\item Check that odd-even sort routes any permutation correctly.
\item Show that using only swap, the worst-case time is at least $n-1$.
......@@ -110,6 +114,26 @@ This is a bit of a mouthful to work with, so it may help to simply approximate t
\item Using this information, can you give a lower bound on the routing time using weighted reversals?
\end{enumerate}
Permutation via weighted reversal has been studied in the context of gene sequencing. Here are some potentially useful references, \cite{Bender2008, Pinter2002, Blanchette1996}. Note that in that context, the time cost is once again the time for all operations, while we are interesting in parallelizing non-overlapping reversals as much as possible.
\subsection{Related Work}
Permutation via weighted reversal has been studied in the context of gene sequencing.
Specifically, they consider sequences of reversals to sort binary strings~\cite{Kececioglu1995}.
While we do not exactly consider binary strings,
we could define some indicator function assigning either a ``0'' or ``1''
to destinations $V$ and then use these algorithms for sorting by reversals.
For example, using the indicator function
\begin{equation}
I(v) = \begin{cases*}
0 & if $v < n/2$ \\
1 & otherwise
\end{cases*}
\end{equation}
to sort tokens on either half of the graph, and then recursively sorting both sides in a similar fashion.
Moreover, our reversal time scales with the its length,
which is more accurately represented by a length-weighted reversal~\cite{Pinter2002}.
As it turns out, we are in the linearly-scaling regime of~\cite{Bender2008}.
Unfortunately, their time cost is the sum time for all operations (i.e., sequential operation),
whereas we are interesting in minimizing the parallel execution time as much as possible.
The parallel time seems significantly different from the sequential time
and is probably the most significant difference from prior work.
\printbibliography%
\end{document}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment