Commit 4d848782 authored by Eddie Schoute's avatar Eddie Schoute
Browse files

Merge branch 'release' into 'master'

Release

See merge request eschoute/reversal-sort!2
parents f7095365 eca82628
# Sorting by Reversals
Sorting by Reversals is a small python program that records the time to sort random permutations using algorithms from the Quantum routing with fast reversals paper: GDC(TBS), GDC(ATBS), OES (odd-even sort).
## Installation
Simply download the repository. No dependencies required, assuming Python3 is installed.
## Usage
### Unit tests
To run the unit tests, enter ```python -m unittest``` into the command line at the top level directory.
### Reversal routing
Entering ```echo P | python main.py``` into the command line will print out the reversals in order to sort the permutation P using
GDC(ATBS).
For example, `echo 0 1 3 2 | python main.py` will output to the console `[(2,3)]` because a reversal starting at index 2 and ending at index 3 will
sort the permutation.
Alternatively, you can type `python main.py` and from there simply input permutations in the command line and recieve the sorting sequence of reversals as output.
### Data collection
To run the program for data collection, call ```main(NUM_PERMS_PER_LENGTH, LENGTH_FROM, LENGTH_TO)``` in the ```main.py``` file, where
```NUM_PERMS_PER_LENGTH:``` number of random permutations to run the algorithms on per permutation length
```LENGTH_FROM:``` starting permutation length
```LENGTH_TO:``` ending permutation length
In other words, for each ```N``` from ```LENGTH_FROM``` to ```LENGTH_TO```, the program generates a csv file ```data/len_N_random_perms.csv``` containing ```NUM_PERMS_PER_LENGTH``` permutations.
The resulting csv files will be generated in the ```data/```directory. Each csv file will contain a list of the permutations generated and the corresponding routing times for the three algorithms, GDC(TBS), GDC(ATBS), and OES.
For example, the following may be the output for ```data/len_10_random_perms.csv``` when ```NUM_PERMS_PER_LENGTH``` = 9.
```
Permutation,OES,GDC(TBS),GDC(ATBS)
"2, 3, 9, 6, 8, 5, 1, 10, 4, 7",7,6.666666666666667,7.0
"10, 5, 7, 2, 3, 9, 6, 1, 4, 8",10,8.666666666666666,7.666666666666667
"10, 8, 3, 1, 7, 9, 4, 5, 2, 6",10,8.0,7.666666666666667
"5, 6, 8, 3, 1, 9, 2, 10, 7, 4",7,7.666666666666667,8.0
"3, 10, 7, 9, 8, 2, 1, 6, 4, 5",9,9.0,7.666666666666667
"8, 10, 7, 6, 1, 5, 2, 4, 3, 9",9,8.0,6.666666666666667
"5, 8, 9, 2, 3, 6, 10, 1, 4, 7",7,7.666666666666667,6.333333333333333
"7, 10, 2, 5, 8, 1, 3, 9, 6, 4",8,6.666666666666667,8.666666666666666
"2, 1, 6, 3, 9, 8, 10, 4, 5, 7",5,7.333333333333333,5.0
```
import csv
from itertools import permutations
from math import factorial
import random
from reversal_sort import routing
from reversal_sort.tripartite_binary_sort import tripartite_binary_sort
from reversal_sort.adaptive_tbs import adaptive_tb_sort
"""
Collects routing time data for the algorithms GDC(TBS), GDC(ATBS), and OES.
Spits out the data into csv files in the "/data/" directory.
"""
def perm_to_str(perm):
"""
Converts a given list or tuple permutation to a string in the proper format (comma and space separated line without
brackets)
"""
if type(perm) is list:
return str(perm).strip('[]')
if type(perm) is tuple:
return str(perm).strip('()')
raise Exception('perm must be a list or a tuple')
def str_to_perm(string):
"""
Converts a string representation of a permutation into a list of ints
"""
return [int(x) for x in string.split(',')]
def new_algs(algs, algnames, nlist):
"""
Adds a new column to the data files for the values of n given in nlist. The column corresponds to the given
algorithm alg. Creates a new file called new_filename with which is a copy of the old data file with the new
column added on.
"""
for n in nlist:
with open('data/len_{:.0f}_perms.csv'.format(n), 'r') as readfile, \
open('data/new_len_{:.0f}_perms.csv'.format(n), 'w', newline='') as writefile:
reader = csv.reader(readfile)
writer = csv.writer(writefile)
firstrow = True
for row in reader:
if firstrow:
assert (all(algname not in row for algname in algnames))
firstrow = False
row += algnames
else:
perm = str_to_perm(row[0])
for alg in algs:
newcost = alg(perm.copy())
row.append(newcost)
writer.writerow(row)
print(n, 'complete...')
def new_data_file(alglist, algnames, nlist):
"""
Creates a new data file for the n values given in nlist with columns corresponding to the algorithms given in
alglist
"""
assert (len(alglist) == len(algnames))
for n in nlist:
with open('data/len_{:.0f}_perms.csv'.format(n), 'w', newline='') as writefile:
writer = csv.writer(writefile)
firstrow = ['Permutation'] + algnames
writer.writerow(firstrow)
ct = 1
ident = [x + 1 for x in range(n)]
for perm in permutations(ident):
row = [perm_to_str(perm)]
for alg in alglist:
row.append(alg(list(perm)))
writer.writerow(row)
ct += 1
if ct % (factorial(n) / n) == 0:
print('{:.0f}% complete'.format(ct / factorial(n) * 100))
def new_rand_perms_file(alglist, algnames, nlist, count):
"""
alglist = list of algorithms as functions that take a single argument L, the list to be sorted
algnames = list of algorithm names, as strings
nlist = list of permutation lengths
count = number of random perms of each length to run algorithms over
Runs each algorithm in alglist on the same random perms and spits the data for
each permutation length X into a csv file called
"data/len_X_random_perms.csv"
Note: folder "data/" needs to exist in the top level directory from where
this script is running
"""
assert (len(alglist) == len(algnames))
for n in nlist:
with open('data/len_{:.0f}_random_perms.csv'.format(n), 'w', newline='') as writefile:
writer = csv.writer(writefile)
firstrow = ['Permutation'] + algnames
writer.writerow(firstrow)
data = []
for i in range(count):
perm = [x + 1 for x in range(n)]
random.shuffle(perm)
row = [perm_to_str(perm)]
for alg in alglist:
row.append(alg(perm.copy()))
data.append(row)
# writer.writerow(row)
if i % (count // 10) == 0:
print('\t{:.0f}% complete'.format(i / count * 100))
writer.writerows(data)
print(n, 'complete')
def main(NUM_PERMS_PER_LENGTH, LENGTH_FROM, LENGTH_TO):
algnames = ['OES', 'GDC(TBS)', 'GDC(ATBS)']
algs = [routing.odd_even_sort, routing.GDC_TBS, routing.GDC_ATBS]
new_rand_perms_file(algs, algnames, list(range(LENGTH_FROM,LENGTH_TO + 1)), NUM_PERMS_PER_LENGTH)
# Now you can run `echo "0 1 3 2" | python main.py`
if __name__ == "__main__":
import argparse
import sys
parser = argparse.ArgumentParser(description="Route a given permutation by using reversals and attempt to minimize time.")
parser.add_argument("permfile", help="File containing a list of permutations, one per line (default: stdin).",
type=argparse.FileType('r'), nargs='?', default=sys.stdin)
parser.add_argument("--sorting", help="Select binary sorting routine in generic divide and conquer (default: TBS)",
default="tbs", choices=["tbs", "atbs"])
args = parser.parse_args()
if args.sorting == "atbs":
sorting = adaptive_tb_sort
else:
sorting = tripartite_binary_sort
for line in args.permfile:
perm = [int(el) for el in line.split()]
# Check if is permutation
if set(range(len(perm))) != set(perm):
raise ValueError(f"Given permutation does not contain all elements in [0,{len(perm)-1}].")
router = routing.DCRoute(sorting)
reversals = router.route(perm)
print(reversals) # TODO: Implement pretty print
\ No newline at end of file
[[package]]
name = "autopep8"
version = "1.5.5"
description = "A tool that automatically formats Python code to conform to the PEP 8 style guide"
category = "dev"
optional = false
python-versions = "*"
[package.dependencies]
pycodestyle = ">=2.6.0"
toml = "*"
[[package]]
name = "pycodestyle"
version = "2.6.0"
description = "Python style guide checker"
category = "dev"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
[[package]]
name = "toml"
version = "0.10.2"
description = "Python Library for Tom's Obvious, Minimal Language"
category = "dev"
optional = false
python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
[metadata]
lock-version = "1.1"
python-versions = "^3.8"
content-hash = "69f8da9bda3c52f2aeef6c0b305a37c17ebe7dbf3e7e7433d3c962a6fa6f07ed"
[metadata.files]
autopep8 = [
{file = "autopep8-1.5.5-py2.py3-none-any.whl", hash = "sha256:9e136c472c475f4ee4978b51a88a494bfcd4e3ed17950a44a988d9e434837bea"},
{file = "autopep8-1.5.5.tar.gz", hash = "sha256:cae4bc0fb616408191af41d062d7ec7ef8679c7f27b068875ca3a9e2878d5443"},
]
pycodestyle = [
{file = "pycodestyle-2.6.0-py2.py3-none-any.whl", hash = "sha256:2295e7b2f6b5bd100585ebcb1f616591b652db8a741695b3d8f5d28bdc934367"},
{file = "pycodestyle-2.6.0.tar.gz", hash = "sha256:c58a7d2815e0e8d7972bf1803331fb0152f867bd89adf8a01dfd55085434192e"},
]
toml = [
{file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"},
{file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"},
]
[tool.poetry]
name = "reversal-sort"
version = "0.1.0"
description = ""
authors = ["None"]
[tool.poetry.dependencies]
python = "^3.8"
[tool.poetry.dev-dependencies]
autopep8 = "^1.5.5"
[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
import math
from .reversal import Reversal
def beginning_index_of_weight(w, v, i):
"""
Calculates and returns the first index of weight i.
"""
return v[i] + v[i - 1] - w[i]
def end_index_of_weight(w, v, j):
"""
Calculates and returns the last index of weight j.
"""
return v[j + 1] + v[j] - w[j + 1] - 1
def binary_to_weights(b):
"""
Converts the binary sequence into a sequence of weights starting with a 0-weight and ending with a 1-weight.
Returns a list of integers corresponding to this weight sequence.
"""
w = []
weight = 0
finding = 0
for ind in range(len(b)):
if b[ind] != finding:
finding = 1 - finding
w.append(weight)
weight = 1
else:
weight += 1
w.append(weight)
if finding == 0:
w.append(0)
return w
def weights_to_cumulative(w):
"""
Converts the given weight sequence into a cumulative weight sequence.
Returns a list of integers corresponding to this cumulative weight sequence.
"""
evensum = 0
oddsum = 0
v = []
for ind in range(len(w)):
if ind % 2 == 0:
evensum += w[ind]
v.append(evensum)
else:
oddsum += w[ind]
v.append(oddsum)
return v
def adaptive_tb_sort(b):
"""
Returns the reversals required to sort the given binary sequence using a parallelized version of the sequential
binary sort algorithm found in Bender et al.'s paper "Improved Bounds on Sorting by Length-Weighted Reversals".
The changes made to parallelize are made in equation (6).
"""
w = binary_to_weights(b)
v = weights_to_cumulative(w)
A = []
for i in range(len(w)):
A.append([])
for j in range(len(w)):
A[i].append([[0, []], [0, []]])
for j in range(1, len(w) - 1):
for i in range(j - 1, 0, -1):
fill_4_parallel(w, v, A, i, j, i % 2)
fill_4_parallel(w, v, A, i, j, (i + 1) % 2)
return A[1][len(w) - 2][0][1]
def fill_4_parallel(w, v, A, i, j, b):
"""
Fills the matrix A[i,j,b] according to equation (4) in the paper. The parallelizations are made in equation (6).
"""
if j == i + 1 and b == i % 2:
A[i][j][b] = [0, []]
elif j == i + 1 and b == (i + 1) % 2:
leftind = beginning_index_of_weight(w, v, i)
rightind = end_index_of_weight(w, v, j)
r = Reversal(leftind, rightind)
A[i][j][b] = [w[i] + w[j] + 1, [r]]
elif j > i + 1 and b == i % 2:
A[i][j][b] = A[i + 1][j][b]
elif j > i + 1 and b == (j + 1) % 2:
A[i][j][b] = A[i][j - 1][b]
else:
fill_6_parallel(w, v, A, i, j, b)
def fill_6_parallel(w, v, A, i, j, b):
"""
Fills the matrix A[i,j,b] according to a parallelized version of equation (6) in the paper. Instead of adding the
times to sort the three subsections, this algorithm simply takes the maximum.
"""
minval = math.inf
minrevs = []
for t in range(i, j):
for k in range(t + 1, j + 1):
revcost = v[k - 1 - ((k - 1 - i) % 2)] - v[i] + w[i] + v[j] - v[t + 1 + ((j - (t + 1)) % 2)] + w[t + 1 + ((j - (t + 1)) % 2)] + 1
val = max(A[i][t][b][0], A[t + 1][k - 1][1 - b][0], A[k][j][b][0]) + revcost
if val < minval:
minval = val
leftind = beginning_index_of_weight(w, v, t + 1) - (v[t - ((t - i) % 2)] - v[i] + w[i])
rightind = end_index_of_weight(w, v, k - 1) + (v[j] - v[k + ((j - k) % 2)] + w[k + ((j - k) % 2)])
r = Reversal(leftind, rightind)
minrevs = A[i][t][b][1] + A[t + 1][k - 1][1 - b][1] + A[k][j][b][1] + [r]
A[i][j][b] = [minval, minrevs]
"""
Interface for a Reversal Class.
"""
# Returns the cost of the given reversal
def rev_cost(reversal):
return abs(reversal.beg - reversal.end) + 2 # Currently omits the 1/3 factor
class Reversal:
def __init__(self, i, j, time=0):
if i > j:
raise ValueError('Reversal\'s left index greater than right index')
self.beg = i
self.end = j
if time != 0:
self.time_remaining = time
else:
self.time_remaining = rev_cost(self)
# Decrease the time remaining for the reversal by the given time (does not fall below zero)
# Returns the subsequent time remaining
def dec_time(self, t):
self.time_remaining = max(0, self.time_remaining - t)
return self.time_remaining
def get_time_remaining(self):
return self.time_remaining
def get_length(self):
return abs(self.beg - self.end) + 1
def offset(self, by):
return Reversal(self.beg + by, self.end + by)
def apply(self, perm):
# Perform the given reversal on the given permutation
i = self.beg
j = self.end
perm[i:j+1] = perm[i:j+1][::-1]
def __lt__(self, other):
return self.beg < other.beg
def __repr__(self):
return "(" + str(self.beg) + ", " + str(self.end) + ")"
def __str__(self):
return repr(self)
# Determines if the reversals in the given list are independent, where perm_length is the length of the permutation
def independent(rev_list, perm_length):
busy = [0]*perm_length
for rev in rev_list:
for i in range(rev.beg, rev.end+1):
if busy[i]:
return False
busy[i] = 1
return True
# Simpler function that checks if rev is independent to all reversals in the
# active rev list, when we know that the latter are already all indepdendent
def check_independent(rev, active_rev_list):
for a_rev in active_rev_list:
if not (rev.beg > a_rev.end or rev.end < a_rev.beg):
return False
return True
# Checks whether the item at index i is busy; namely, if i is participating in
# a reversal in the rev_list
def is_busy(rev_list, i):
for rev in rev_list:
if rev.beg <= i and rev.end >= i:
return True
return False
# Determines if the permutation is sorted (i.e. if it is the identity permutation from start to end, inclusive)
def is_sorted(permutation, start, end):
return permutation == list(range(start, end+1))
# Determines if the permutation is a valid permutation on the elements from start to end, inclusive
def is_valid_permutation(permutation, start, end):
return sorted(permutation) == list(range(start,end+1))
# Determines the time (in time steps) taken to perform the reversals in the given list in the given order in parallel
def compress_reversals(revlist, permsize):
timesteps = []
for rev in revlist:
init_step = len(timesteps)
for step in timesteps[::-1]:
busy = False
for i in range(rev.beg, rev.end+1):
if step[i]:
busy = True
break
if busy:
break
init_step -= 1
while init_step + rev_cost(rev) - 1 > len(timesteps) - 1:
timesteps.append([0]*permsize)
for i in range(init_step, init_step + rev_cost(rev)):
for j in range(rev.beg, rev.end+1):
timesteps[i][j] = 1
return len(timesteps)
class ReversalCompresser:
"""
Alternate implementation of the compress_reversals function.
"""
def __init__(self):
"""
List to be sorted
"""
self.model = []
self.active_revs = []
self.counter = 0
def reset(self):
self.__init__()
def is_busy(self, start_index, end_index):
"""
checks if chain is busy between start index and end index, inclusive
"""
return 1 in self.model[start_index:end_index+1]
def make_busy(self, start_index, end_index):
"""inclusive"""
for i in range(start_index, end_index + 1):
self.model[i] = 1
def make_free(self, start_index, end_index):
for i in range(start_index, end_index + 1):
self.model[i] = 0
def independent(self, rev, revlist):
"""
checks if rev is independent of all reversals in revlist, when we allow None in revlist
"""
for a_rev in revlist:
if a_rev is None:
continue
if not (rev.beg > a_rev.end or rev.end < a_rev.beg):
return False
return True
def update(self):
for rev in self.active_revs:
rev.step -= 1
if rev.step == 0:
self.make_free(rev.beg, rev.end)
self.active_revs = [rev for rev in self.active_revs if rev.step > 0]
self.counter += 1
def compress(self, revlist, permsize):
revlist = revlist[:]
self.reset()
self.model = [0]*permsize
self.active_revs = []
while len(revlist) > 0:
for r in range(len(revlist)):
rev = revlist[r]
if not self.is_busy(rev.beg, rev.end) and self.independent(rev, revlist[:r]):
self.make_busy(rev.beg, rev.end)
self.active_revs.append(rev)
rev.step = rev.get_length() + 1 # to account for the + 1 in the cost fucntion
revlist[r] = None
revlist = list(filter(lambda i : i is not None, revlist))
self.update()
# wait for final reversals to finish, if there are any
while self.active_revs != []:
self.update()
return self.counter
from . import reversal
def median(L):
return len(L) // 2
def perm_to_01(L):
"""
TUrns a permutation L[i:j] into a permutation of 0s and 1s, where L[i] is 0 if it is
less than the median and L[i] is 1 if it is greater than the median
"""
# Note that permutations are 0-indexed so we need to shift by one.
return [int(x + 1 > median(L)) for x in L]
class DCRoute:
"""Divide and Conquer routing algorithm"""
def __init__(self, sorting_alg):
"""Construct divide and conquer routing algorithm with a sorting algorithm"""
self.alg = sorting_alg
def route(self, L):
"""
Sorts the given permutations L using a divide and conquer approach. Returns a list of
reversals used to perform the sort.
L must contain all elements in [0, len(L)-1] once.
"""
if len(L) <= 1:
return []
T = perm_to_01(L)
revs = self.alg(T)
for rev in revs:
rev.apply(L)
m = median(L)
left_revs = self.route(L[:m])
right_perm = [x - m for x in L[m:]]
right_revs = [rev.offset(m) for rev in self.route(right_perm)]
return revs + left_revs + right_revs
def odd_even_sort(L):