torchlight/loopover.py

## loopover.py
"""
Loopover solver for arbitrary sizes; asymptotically optimal in STM up to log factors.

Let N be the total number of cells.

This solver uses Shell sort with 3-smooth gaps to sort all of the pieces. For a specific gap, the
swaps performed are all "disjoint" and we apply O(N) setup moves so that all the swaps can be
performed in O(N) moves. Since there are only O(log^2 N) gaps, this leads to an O(N log^2 N) solving
algorithm.

There are some technical details relating to permutation parity, which makes the naive idea not
quite work, but these can be worked around.
"""

import itertools
import math
import random

def remainder(x, y):
    fudge = y - y//2 - 1
    return (x + fudge) % y - fudge

def move_row(board, row_index, amount):
    amount %= len(board[0])
    if amount == 0:
        return
    board[row_index] = board[row_index][-amount:] + board[row_index][:-amount]

def move_col(board, col_index, amount):
    amount %= len(board)
    if amount == 0:
        return
    col = [row[col_index] for row in board]
    col = col[-amount:] + col[:-amount]
    for (row, x) in zip(board, col):
        row[col_index] = x

def invert_move_sequence(seq):
    return tuple((dir, index, -amount) for (dir, index, amount) in seq[::-1])

def apply_move_sequence(board, seq):
    move_funs = {'r': move_row, 'c': move_col}
    for (dir, index, amount) in seq:
        move_funs[dir](board, index, amount)

def normalise_move_sequence(rows, cols, seq):
    return tuple(filter(lambda x: x[2] != 0, ((d, i, remainder(a, rows if d == 'c' else cols)) for (d, i, a) in seq)))

def transpose_move_sequence(seq):
    switch = {'r': 'c', 'c': 'r'}
    return tuple((switch[d], i, a) for (d, i, a) in seq)

def len_move_sequence(seq):
    return sum(abs(a) for (d, i, a) in seq if a != 0)

def create_board(rows, cols):
    # note that rows = height, cols = width
    return [[c + cols*r for c in range(cols)] for r in range(rows)]

def create_random_board(rows, cols, random=random):
    if rows > 1 or cols > 1:
        l = list(range(rows * cols))
        random.shuffle(l)
        if rows % 2 == cols % 2 == 1 and permutation_parity(l) == 1:
            l[0], l[1] = l[1], l[0]
    else:
        n = rows * cols
        r = random.randrange(n)
        l = [(i+r)%n for i in range(n)]
    return [l[cols*r:cols*(r+1)] for r in range(rows)]

def copy_board(board):
    return [row[:] for row in board]

def flatten_board(board):
    flattened = []
    for row in board:
        flattened.extend(row)
    return flattened

def prettyprint(board):
    print('\n'.join('\t'.join(map(str, row)) for row in board))

def decompose_inverse_permutation(perm):
    """
    Given a permutation (given as a list of integers), decompose its inverse into a product of
    involutions $\pi^{-1} = \pi_0 \pi_1 \pi_2 \dots$ (given as a list of lists of swaps) such that
    each $\pi_i$ has "uniform-distance" swaps, i.e. it is of the form
    $\pi_i = (a_0, a_0+d) (a_1, a_1+d) \dots$

    (We can obtain a decomposition for the input permutation $\pi$ itself by reversing the
    decomposition, since the decomposition consists solely of transpositions. However, we have no
    use for this.)

    This is currently done with Shellsort with 3-smooth gaps; see:
    http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/shell/shellen.htm
    Odd-even merge sort would also work, and I believe bitonic merge sort (in its bitonic,
    non-monotonised form) should also work, but those are more complicated and only get us (maybe)
    a constant factor improvement in the number of swaps.
    """

    if len(perm) == 1:
        return []

    perm = list(perm) # make a copy that we can mess around with
    n = len(perm)
    decomposition = []

    gap = 1
    # determine the initial gap
    while 3*gap < n:
        gap *= 3
    if 2*gap < n:
        gap *= 2

    # loop over the 3-smooth numbers
    while True:
        stage = []
        # while the *comparisons* with 3-smooth Shell sort aren't disjoint, the resulting *swaps*
        # always are because an element can't move more than once for each gap.
        for i in range(n-gap):
            if perm[i] > perm[i+gap]:
                perm[i], perm[i+gap] = perm[i+gap], perm[i]
                stage.append((i, i+gap))
        if stage:
            decomposition.append(tuple(stage))

        # if the gap is 1, we're done
        if gap == 1:
            break
        # if not, update the gap
        elif gap % 2 == 0:
            gap //= 2
        else:
            gap = gap // 3
            while 2*gap < n:
                gap *= 2
        # note that the gaps are not monotonically decreasing; the gap sequence we use is
        # the reverse of:
        # 1, 2, 4, 8, ...,  3, 6, 12, 24, ...,  9, 18, 36, 72, ..., ...
        # this still works for Shell sort because every time we hit a gap g, we've already sorted by
        # 2g and by 3g.

    return tuple(decomposition)

def permutation_parity(perm):
    return sum(map(len, decompose_inverse_permutation(perm))) % 2

"""
These solving functions are *in-place* and return the move sequence used.

We assume the dimensions are at least 2 on each side. Silly/weird things can happen on 1xn or nx1
boards, so don't do that.
"""
def solve_piece_in_first_row(board, index):
    """
    Solve a piece that belongs in the first row.

    Constraints:
    - index < cols
    - all pieces before index in the first row are already solved

    Maximum move count:
    floor(rows/2) + floor(cols/2) + 1
    """
    rows, cols = len(board), len(board[0])
    ((r, c), ) = ((r, c) for (r, row) in enumerate(board) for (c, x) in enumerate(row) if x == index)
    if r == 0 and c == index:
        return ()
    if index == 0:
        # first piece; don't need to care about preserving anything
        move_row(board, r, -c)
        move_col(board, 0, -r)
        if r == 0:
            return (('r', r, -c), )
        if c == 0:
            return (('c', 0, -r), )
        return (('r', r, -c), ('c', 0, -r))

    sol = []
    if r == 0:
        # not the first piece, and it's stuck in the first row at the wrong position
        move_col(board, c, 1)
        sol.append(('c', c, 1))
        r += 1

    # slide the piece to the correct column
    if c != index:
        move_row(board, r, index-c)
        sol.append(('r', r, index-c))
    # then slide it to the top row
    move_col(board, index, -r)
    sol.append(('c', index, -r))
    return tuple(sol)

def solve_phase1(board):
    """
    Solve the first two pieces of the first row and parity.

    Maximum move count:
    rows + cols + 2
    """
    solution = solve_piece_in_first_row(board, 0)
    solution += solve_piece_in_first_row(board, 1)
    if permutation_parity(flatten_board(board)) == 1:
        # if we have odd parity, at least one of the two dimensions must be even
        # (otherwise every board state would have even parity)
        rows = len(board)
        cols = len(board[0])
        if cols % 2 == 0:
            solution += (('r', 1, 1), )
            move_row(board, 1, 1)
        else:
            # if cols is odd, then it must be at least 3 so moving column 2 won't affect the two
            # pieces we just solved.
            solution += (('c', 2, 1), )
            move_col(board, 2, 1)
    return solution

def solve_phase2(board):
    """
    Solve the rest of the pieces once the first two pieces and parity are solved.

    Maximum move count:
    <= 19.25/((log 2)(log 3)) * N * log(N)^2 * (1+o(1)) if rows = O(1) or cols = O(1)
    or 11/ ... if rows, cols = o(N)
    """
    rows, cols = len(board), len(board[0])
    stages = decompose_inverse_permutation(flatten_board(board))
    sol = []
    for stage in stages:
        diff = stage[0][1]-stage[0][0] # the gap (must be the same for all stage[i][1]-stage[i][0])
        stage_coords = tuple((divmod(a, cols), divmod(b, cols)) for (a, b) in stage)
        swaps_nooverflow = tuple((a, b) for (a, b) in stage_coords if b[0]-a[0] == diff // cols)
        swaps_overflow = tuple((a, b) for (a, b) in stage_coords if b[0]-a[0] == diff // cols + 1)
        assert len(swaps_nooverflow) + len(swaps_overflow) == len(stage_coords)

        seq = get_uniform_swaps_move_sequence(rows, cols, swaps_nooverflow)
        apply_move_sequence(board, seq)
        sol.extend(seq)

        seq = get_uniform_swaps_move_sequence(rows, cols, swaps_overflow)
        apply_move_sequence(board, seq)
        sol.extend(seq)
    return tuple(sol)

def solve(board):
    rows, cols = len(board), len(board[0])
    return normalise_move_sequence(rows, cols, solve_phase1(board) + solve_phase2(board))

def get_3cycle_move_sequence(rows, cols, a, b, c):
    """
    Apply a 3-cycle moving the piece at position a to position b to position c to position a. The
    positions are specified as (row, column) coordinates.

    Maximum move count:
    ||a-b|| + ||b-c|| + ||c-a|| + 2 for non-general-position
    <= 3/2 (||a-b|| + ||b-c|| + ||c-a||)
    <= 3/2 (rows + cols)

    3/2 (||a-b|| + ||b-c|| + ||c-a||) for general position
    <= 3/2 (rows + cols)
    """
    if a[0] == b[0] == c[0]:
        # all in the same row; move one piece into a separate row
        return (('c', c[1], 1), ) + get_3cycle_move_sequence_row_interchange(a, b, ((c[0]+1) % rows, c[1])) + (('c', c[1], -1), )

    # check if two points are in the same row; if so, then cycle the points so that the first two
    # are in the same row
    if a[0] == b[0]:
        return get_3cycle_move_sequence_row_interchange(a, b, c)
    if b[0] == c[0]:
        return get_3cycle_move_sequence_row_interchange(b, c, a)
    if c[0] == a[0]:
        return get_3cycle_move_sequence_row_interchange(c, a, b)

    if a[1] == b[1] or b[1] == c[1] or c[1] == a[1]:
        # two pieces in the same column; transpose and call into the above
        return transpose_move_sequence(get_3cycle_move_sequence(cols, rows, a[::-1], b[::-1], c[::-1]))

    # otherwise, the pieces are "in general position" (all rows distinct, all columns distinct)
    # if the sum of vertical distances is smaller than the sum of horizontal distances, move b to
    # the same row as a, then do a standard 3-cycle; otherwise, transpose first, then do that.
    sum_rowdist = sum(abs(remainder(x, rows)) for x in (a[0]-b[0], b[0]-c[0], c[0]-a[0]))
    sum_coldist = sum(abs(remainder(x, cols)) for x in (a[1]-b[1], b[1]-c[1], c[1]-a[1]))
    if sum_rowdist <= sum_coldist:
        return (('c', b[1], a[0]-b[0]), ) + get_3cycle_move_sequence_row_interchange(a, (a[0], b[1]), c) + (('c', b[1], b[0]-a[0]), )
    return (('r', b[0], a[1]-b[1]), ) + transpose_move_sequence(get_3cycle_move_sequence_row_interchange(a[::-1], (a[1], b[0]), c[::-1])) + (('r', b[0], b[1]-a[1]), )

def get_3cycle_move_sequence_row_interchange(a, b, c):
    """
    Assume a[0] == b[0] != c[0].

    Maximum move count:
    ||a-b|| + ||b-c|| + ||c-a||
    <= rows + cols
    (since the sum of horizontal distances <= cols and likewise in the other direction)
    """
    seq = (
        ('r', a[0], c[1]-a[1]), # move A to same col as C
        ('c', c[1], a[0]-c[0]), # move C to take A's place
        ('r', a[0], a[1]-b[1]), # move B to where C is now
        ('c', c[1], c[0]-a[0]), # move B to C's initial position
        ('r', a[0], b[1]-c[1]), # move A and C to B's and A's initial positions
    )
    return tuple((d, i, a) for (d, i, a) in seq if a != 0)

def check_3cycles(rows=5, cols=6):
    coordinates = ((r, c) for r in range(rows) for c in range(cols))
    for (a, b, c) in itertools.permutations(coordinates, 3):
        board = [[(r, c) for c in range(cols)] for r in range(rows)]
        seq = get_3cycle_move_sequence(rows, cols, a, b, c)
        apply_move_sequence(board, seq)
        if board[a[0]][a[1]] != c or board[b[0]][b[1]] != a or board[c[0]][c[1]] != b:
            prettyprint(board)
            print(a, b, c)
            return False
    return True

def get_2_2cycle_move_sequence(rows, cols, a, b, c, d):
    """
    Swap a <-> b and c <-> d by using two 3-cycles.
    Maximum move count:
    3/2 (||a-b|| + ||c-d|| + 2||a-c|| + ||b-c|| + ||a-d||)
    <= 3||a-b|| + 3||c-d|| + 6||a-c||
    """
    return get_3cycle_move_sequence(rows, cols, a, b, c) + get_3cycle_move_sequence(rows, cols, a, d, c)

def check_2_2cycles(rows=5, cols=5):
    coordinates = ((r, c) for r in range(rows) for c in range(cols))
    for (a, b, c, d) in itertools.permutations(coordinates, 4):
        board = [[(r, c) for c in range(cols)] for r in range(rows)]
        seq = get_2_2cycle_move_sequence(rows, cols, a, b, c, d)
        apply_move_sequence(board, seq)
        if board[a[0]][a[1]] != b or board[b[0]][b[1]] != a or board[c[0]][c[1]] != d or board[d[0]][d[1]] != c:
            prettyprint(board)
            print(a, b, c, d)
            return False
    return True

def get_uniform_swaps_move_sequence(rows, cols, swaps):
    """
    Assume the displacement for each swap is equal.

    If there is an even number of swaps, we pair them up and apply them one by one.

    If there is an odd number of swaps, we pair up the first swap with ((0,0), (0,1)), and pair up
    the rest as usual.

    Naively applying the swaps leads to O(max(rows,cols)) moves per swap and O(max(rows,cols)*N) for
    the whole thing, so we use O(N) setup moves to force most swaps (except possibly the first one)
    to be of displacement (1, 1) (i.e. each swap is of a piece and the one to its bottom-right). As
    long as we pair up the swaps in a reasonable manner (e.g. raster scan (= lexicographic order) or
    serpentine scan), we can do all of the swaps in O(N) moves.

    If rows = cols, then the shearing procedure used here will convert every swap to a (1,1) swap,
    but in general there can be either O(rows) swaps of distance O(cols), or O(cols) swaps of
    distance O(rows), because of boundary-related weirdness. We handle all of the (1,1) swaps first,
    then repeat the shearing procedure to convert the stragglers to (1,1) swaps as well.

    (We could also do minimum-weight matchings, but the algorithms for that are kinda non-trivial
    and it's not super important anyway.)

    Maximum move count:

    Initial swap to handle parity:
    At most 4 + 4.5*(rows+cols) moves.

    The swaps that get set up to (1,1) displacement:
    At most N/4 pairs of swaps, each taking at most 12 + 6*dist moves where sum(dist) <= N.
    This gives (N/4) * 12 + 6 * N <= 9 * N.

    The remaining swaps:
    All such swaps are either in the leftmost and rightmost columns, or in the top and bottom rows.
    This leaves at most max(rows, cols)/2 pairs of swaps, each taking at most 12 + 6*dist' moves
    where sum(dist') <= max(rows, cols). This gives max(rows, cols)/2 * 12 + 6 * max(rows, cols)
    <= 12 * max(rows, cols).

    Setup moves:
    Each row is moved by at most cols/2 and each column is moved by at most rows/2, for a total of N
    moves. (The setup moves also have to be undone at the end, so this really contributes 2*N.)

    Adding everything together: 3 + 4.5*(rows+cols) + 9*N + 12*max(rows, cols) + 2*N
    = 3 + 4.5*rows + 4.5*cols + 12*max(rows, cols) + 11*N
    """
    if len(swaps) == 0:
        return ()
    seq = []
    if len(swaps) % 2 == 1:
        # if we have an odd number of swaps, combine the first swap with a swap of (0,1);
        # this is not necessarily uniform, but it doesn't matter because we do this at most once.
        # this costs <= 3 + 4.5*rows + 4.5*cols moves.
        seq.extend(get_2_2cycle_move_sequence(rows, cols, (0, 0), (0, 1), *swaps[0]))
        swaps = swaps[1:]
    if len(swaps) == 0:
        return tuple(seq)

    # the displacement of each swap
    dr, dc = swaps[0][1][0]-swaps[0][0][0], swaps[0][1][1]-swaps[0][0][1]
    #print(dr, dc)

    setup_moves = []

    swaps = [(list(a), list(b)) for (a, b) in swaps]
    if dr != 0:
        # the swaps are not purely horizontal
        if dc != 1:
            block_size = abs(dr)
            adjust = 1 - dc
            # shear the whole board along blocks of rows to make it so that the horizontal
            # displacement is +1 for every swap (without affecting vertical displacement)
            for row_base in range(0, rows, block_size):
                shift_amount = ((row_base // block_size) * adjust) % cols
                for row_index in range(row_base, min(rows, row_base+block_size)):
                    setup_moves.append(('r', row_index, shift_amount))
            for swap in swaps:
                for coords in swap:
                    coords[1] += (coords[0] // block_size) * adjust
                    coords[1] %= cols
                assert (swap[1][1] - swap[0][1]) % cols == 1
        if dr != 1:
            # then shear along columns to make it so that the vertical displacement is +1, except
            # possibly for the swaps that wrap around horizontally.
            for col_index in range(cols):
                setup_moves.append(('c', col_index, col_index*(1-dr)))
            for swap in swaps:
                for coords in swap:
                    coords[0] += coords[1]*(1-dr)
                    coords[0] %= rows
                if swap[1][1] != 0:
                    # check if the swap straddles the left/right border; if it does, then the
                    # following assert can fail so we ignore that.
                    assert (swap[1][0] - swap[0][0]) % rows == 1
    else:
        # otherwise, the swaps are horizontal; essentially just do the above but transposed
        block_size = abs(dc)
        adjust = 1 # 1-dr = 1 since dr = 0
        # shear along blocks of columns
        for col_base in range(0, cols, block_size):
            shift_amount = ((col_base // block_size) * adjust) % rows
            for col_index in range(col_base, min(col_base+block_size, cols)):
                setup_moves.append(('c', col_index, shift_amount))
        for swap in swaps:
            for coords in swap:
                coords[0] += (coords[1] // block_size) * adjust
                coords[0] %= rows
            assert (swap[1][0] - swap[0][0]) % rows == 1
        if dc != 1:
            # then shear along rows
            for row_index in range(rows):
                setup_moves.append(('r', row_index, row_index*(1-dc)))
            for swap in swaps:
                for coords in swap:
                    coords[1] += coords[0]*(1-dc)
                    coords[1] %= cols
                if swap[1][0] != 0:
                    assert (swap[1][1] - swap[0][1]) % cols == 1
    seq.extend(setup_moves)
    #print(setup_moves)

    swaps_11 = [(a, b) for (a, b) in swaps if (b[0]-a[0])%rows == (b[1]-a[1])%cols == 1]
    swaps_rest = [(a, b) for (a, b) in swaps if not ((b[0]-a[0])%rows == (b[1]-a[1])%cols == 1)]
    if len(swaps_11) % 2 == 1:
        # if we have an odd number of exceptional swaps (which also means an odd number of (1,1)
        # swaps), then we pull one out from each list and apply those swaps first, then pair up
        # the rest.
        # this costs <= 8 + 3*(rows+cols)
        seq.extend(get_2_2cycle_move_sequence(rows, cols, *(swaps_11[-1] + swaps_rest[-1])))
        swaps_11 = swaps_11[:-1]
        swaps_rest = swaps_rest[:-1]

    # the shearing might leave the swaps in a weird order, so we just sort them to ensure that when
    # we pair up the swaps, they're "not too far apart". while we can't bound the distance between
    # the swaps in each pair, we can bound the *sum* of those distances if we use serpentine scan.
    swaps_11.sort(key=lambda swap: (swap[0][0], swap[0][1]*(-1)**swap[0][0]))

    # actually do the swaps (in pairs, because we can't just do one swap with only 3-cycles)
    for (swap0, swap1) in zip(swaps_11[0::2], swaps_11[1::2]):
        seq.extend(get_2_2cycle_move_sequence(rows, cols, *(swap0 + swap1)))

    if len(swaps_rest) > 0:
        #print(swaps_rest)
        # there should be only one possible displacement among the exceptional swaps
        assert len(set(((b[0]-a[0])%rows, (b[1]-a[1])%cols) for (a, b) in swaps_rest)) == 1
        dr_ex = (swaps_rest[0][1][0]-swaps_rest[0][0][0])%rows
        dc_ex = (swaps_rest[0][1][1]-swaps_rest[0][0][1])%cols
        if dr_ex == 1:
            # shear horizontally to bring horizontal displacement to +1
            assert len(set(b[0] for (a, b) in swaps_rest)) == 1 # check that they're all in the same row
            row_index = swaps_rest[0][1][0]
            move = ('r', row_index, 1-dc_ex)
            seq.append(move)
            setup_moves.append(move)
        else:
            # dc_ex = 1
            # shear vertically to bring vertical displacement to +1
            assert len(set(b[1] for (a, b) in swaps_rest)) == 1 # check that they're all in the same column
            col_index = swaps_rest[0][1][1]
            move = ('c', col_index, 1-dr_ex)
            seq.append(move)
            setup_moves.append(move)
        # this setup takes at most max(rows/2,cols/2) moves
        swaps_rest.sort()
        for (swap0, swap1) in zip(swaps_rest[0::2], swaps_rest[1::2]):
            swap0[1][0] = (swap0[0][0] + 1) % rows
            swap0[1][1] = (swap0[0][1] + 1) % cols
            swap1[1][0] = (swap1[0][0] + 1) % rows
            swap1[1][1] = (swap1[0][1] + 1) % cols
            seq.extend(get_2_2cycle_move_sequence(rows, cols, *(swap0 + swap1)))
            # each pair of swaps here takes at most 8+4*dist where sum(dist) < max(rows,cols)

    # then undo the setup moves
    seq.extend(invert_move_sequence(setup_moves))

    return tuple(seq)

## loopover2xn.py
"""
Loopover solver for 2 × n (2 rows, n columns).

Uses O(n log n) moves with a divide-and-conquer strategy: we sort the pieces into the correct
columns a la quicksort. Shift all pieces that belong in the left half to the left and all pieces
that belong in the right half to the right, then repeat this on the smaller segments. This
eventually results in every piece being in the correct column, at which point we may apply column
moves to fix the columns that need fixing, and we're done.

We place some restrictions on the moves we will be making, to prevent pieces from one segment from
mixing with another:
1. The top row never moves.
2. The bottom row is allowed to move, but we will ignore the horizontal looping completely.
3. More precisely, we will track the amount the bottom row has moved as an integer in the range
   [-n+1, n-1] (inclusive on both ends, positive = shifted right), and only allow column moves in
   columns c where both c and c+shift are in-bounds.
   If shift > 0, this means shift <= c <= n-1; if shift < 0, this means 0 <= c <= n-1+shift.
"""

import itertools
import math
import random

def remainder(x, y):
    fudge = y - y//2 - 1
    return (x + fudge) % y - fudge

def move_row(board, row_index, amount):
    amount %= len(board[0])
    if amount == 0:
        return
    board[row_index] = board[row_index][-amount:] + board[row_index][:-amount]

def move_col(board, col_index, amount):
    amount %= len(board)
    if amount == 0:
        return
    col = [row[col_index] for row in board]
    col = col[-amount:] + col[:-amount]
    for (row, x) in zip(board, col):
        row[col_index] = x

def invert_move_sequence(seq):
    return tuple((dir, index, -amount) for (dir, index, amount) in seq[::-1])

def apply_move_sequence(board, seq):
    move_funs = {'r': move_row, 'c': move_col}
    for (dir, index, amount) in seq:
        move_funs[dir](board, index, amount)

def normalise_move_sequence(rows, cols, seq):
    return tuple(filter(lambda x: x[2] != 0, ((d, i, remainder(a, rows if d == 'c' else cols)) for (d, i, a) in seq)))

def transpose_move_sequence(seq):
    switch = {'r': 'c', 'c': 'r'}
    return tuple((switch[d], i, a) for (d, i, a) in seq)

def len_move_sequence(seq):
    return sum(abs(a) for (d, i, a) in seq if a != 0)

def create_board(rows, cols):
    # note that rows = height, cols = width
    return [[c + cols*r for c in range(cols)] for r in range(rows)]

def create_random_board(rows, cols, random=random):
    if rows > 1 or cols > 1:
        l = list(range(rows * cols))
        random.shuffle(l)
        if rows % 2 == cols % 2 == 1 and permutation_parity(l) == 1:
            l[0], l[1] = l[1], l[0]
    else:
        n = rows * cols
        r = random.randrange(n)
        l = [(i+r)%n for i in range(n)]
    return [l[cols*r:cols*(r+1)] for r in range(rows)]

def copy_board(board):
    return [row[:] for row in board]

def flatten_board(board):
    flattened = []
    for row in board:
        flattened.extend(row)
    return flattened

def prettyprint(board):
    print('\n'.join('\t'.join(map(str, row)) for row in board))

def invert_permutation(p):
    pinv = [None] * len(p)
    for (i, x) in enumerate(p):
        pinv[x] = i
    return pinv

def record_move(board, seq, move):
    apply_move_sequence(board, (move,))
    seq.append(move)

def solve_2xn(board):
    if len(board) != 2:
        raise ValueError('Board height must be 2.')
    if len(board[0]) != len(board[1]):
        raise ValueError('Board must be rectangular.')

    board = copy_board(board)
    solution = []
    m = n = len(board[0])
    segments = [(0, n)]
    while m > 1:
        # This loop body will run l := floor(log2(n)) times.

        for (a, b) in segments:
            assert b-a == m

        if m % 2 == 1:
            # If m is odd, solve the leftmost column of each segment.
            lookup = invert_permutation(flatten_board(board))
            for (a, b) in segments:
                ind_top, ind_bot = lookup[a], lookup[a+n]
                if ind_top < n > ind_bot:
                    # Both pieces are in the top row; move one to the bottom.
                    record_move(board, solution, ('c', ind_top, 1))
                elif ind_top >= n <= ind_bot:
                    # Both pieces are in the bottom row; move one to the top.
                    record_move(board, solution, ('c', ind_bot-n, 1))
                # At most one column move is used per segment.
            # Now we have one piece in the top row and one in the bottom row.
            # Sweep the bottom row m-1 cells left and insert the piece in the bottom row to the
            # top-left corner of the segment along the way.
            lookup = invert_permutation(flatten_board(board))
            inds = [[a] + sorted(lookup[a::n]) for (a, b) in segments]
            inds = [(a, i, j) for (a, i, j) in inds if not(i == j-n == a)] # skip already-done columns
            for shift in range(m):
                for (a, i, j) in inds:
                    assert j >= n
                    # We know it's in the bottom row already, so j-n is the column number.
                    if j-n-a == shift and a != i:
                        record_move(board, solution, ('c', a, 1))
                        # We test for a!=i here because it is possible that there is already a piece
                        # in the top-left corner, in which case the column move would be effectively
                        # a no-op and hence wasted.
                if shift < m-1:
                    record_move(board, solution, ('r', 1, -1))
            # Reset the bottom row.
            record_move(board, solution, ('r', 1, m-1))
            # For the segments with a != i, we have both pieces in the top row, and for the segments
            # with a == i, we still have one piece in the bottom row. In the latter case, move the
            # bottom-row piece to the top row.
            for k in range(len(inds)):
                (a, i, j) = inds[k]
                if a == i:
                    assert a != j-n
                    inds[k] = (a, j-n, None)
                    record_move(board, solution, ('c', j-n, 1))
            # The above two for-loops together use at most one column move per segment.
            # Now do the same thing for inserting pieces in the top row to the bottom-left corner.
            for shift in range(1, m):
                record_move(board, solution, ('r', 1, 1))
                for (a, i, j) in inds:
                    assert i < n
                    assert a != i
                    if i-a == shift:
                        record_move(board, solution, ('c', i, 1))
            # The above for-loop uses at most one column move per segment as well.
            # Reset the bottom row.
            record_move(board, solution, ('r', 1, 1-m))
            # Total number of column moves: <= 3 per segment.
            # Total number of row moves: 4(m-1).
            for (a, b) in segments:
                assert {board[0][a], board[1][a]} == {a, a+n}
            m -= 1
            segments = [(a+1, b) for (a, b) in segments]

        # Now m is even.
        assert m % 2 == 0
        m2 = m // 2

        left_count = sum((x%n-a < m2) for (a, b) in segments for x in board[0][a:b])
        right_count = m*len(segments) - left_count
        #print(m, len(segments), left_count, right_count)

        is_left = [False] * (2*n)
        for (a, b) in segments:
            for i in range(a, a + m2):
                is_left[i] = is_left[i+n] = True
                is_left[i+m2] = is_left[i+m2+n] = False
        assert sum(is_left) == len(segments)*m

        if left_count >= right_count:
            # Move the bottom row m-1 cells to the left, then sweep 2(m-1) cells to the right and
            # insert left pieces to the top row along the way.
            # This uses at most m/2 column moves and exactly 3(m-1) row moves.
            # Since every location in the bottom row gets matched with every location in the top
            # row, this is guaranteed to bring every left piece to the top row, and the top row
            # will be completely filled with left pieces.
            record_move(board, solution, ('r', 1, 1-m))
            for shift in range(1-m, m):
                for (a, b) in segments:
                    r = range(a, b+shift) if shift < 0 else range(a+shift, b)
                    for i in r:
                        if is_left[board[1][i]] and not is_left[board[0][i]]:
                            record_move(board, solution, ('c', i, 1))
                if shift < m-1:
                    record_move(board, solution, ('r', 1, 1))
            # Now shift the bottom row to be m/2 cells to the right.
            # (i.e. shifting left by m/2-1 cells)
            record_move(board, solution, ('r', 1, 1-m2))
            # Shift every column in the right half of each segment.
            for (a, b) in segments:
                for i in range(a+m2, b):
                    record_move(board, solution, ('c', i, 1))
            # Reset the bottom row.
            record_move(board, solution, ('r', 1, -m2))
            # This uses exactly m/2 column moves and m-1 row moves.
        else:
            # Same thing as above, but mirrored.
            record_move(board, solution, ('r', 1, m-1))
            for shift in range(m-1, -m, -1):
                for (a, b) in segments:
                    r = range(a, b+shift) if shift < 0 else range(a+shift, b)
                    for i in r:
                        if is_left[board[0][i]] and not is_left[board[1][i]]:
                            record_move(board, solution, ('c', i, 1))
                if shift > 1-m:
                    record_move(board, solution, ('r', 1, -1))
            record_move(board, solution, ('r', 1, m2-1))
            # Shift every column in the left half of each segment.
            for (a, b) in segments:
                for i in range(a, a+m2):
                    record_move(board, solution, ('c', i, 1))
            # Reset the bottom row.
            record_move(board, solution, ('r', 1, m2))


        # Total number of column moves: <= m per segment <= n.
        # Total number of row moves: 4(m-1).

        # Finally, shrink the segments and repeat.
        m = m2
        segments = list(itertools.chain.from_iterable(((a, a+m2), (a+m2, b)) for (a, b) in segments))

    # Bounds for sorting the board into columns
    # Total number of column moves: <= ln + 3n
    # Total number of row moves: <= 4(2n-3 + 2floor(n/2)-3 + ... + 2floor(n/2^(l-1))-3)
    #                            <= 4(4n - 3l) = 16n - 12l

    # Fix the columns that need fixing.
    for i in range(n):
        assert {board[0][i], board[1][i]} == {i, i+n}
        if board[1][i] == i:
            record_move(board, solution, ('c', i, 1))

    # Total number of moves: <= ln + 20 n - 12 l = O(n log n) = O(N log N)
    return solution
	"""
	Loopover solver for arbitrary sizes; asymptotically optimal in STM up to log factors.

	Let N be the total number of cells.

	This solver uses Shell sort with 3-smooth gaps to sort all of the pieces. For a specific gap, the
	swaps performed are all "disjoint" and we apply O(N) setup moves so that all the swaps can be
	performed in O(N) moves. Since there are only O(log^2 N) gaps, this leads to an O(N log^2 N) solving
	algorithm.

	There are some technical details relating to permutation parity, which makes the naive idea not
	quite work, but these can be worked around.
	"""

	import itertools
	import math
	import random

	def remainder(x, y):
	fudge = y - y//2 - 1
	return (x + fudge) % y - fudge

	def move_row(board, row_index, amount):
	amount %= len(board[0])
	if amount == 0:
	return
	board[row_index] = board[row_index][-amount:] + board[row_index][:-amount]

	def move_col(board, col_index, amount):
	amount %= len(board)
	if amount == 0:
	return
	col = [row[col_index] for row in board]
	col = col[-amount:] + col[:-amount]
	for (row, x) in zip(board, col):
	row[col_index] = x

	def invert_move_sequence(seq):
	return tuple((dir, index, -amount) for (dir, index, amount) in seq[::-1])

	def apply_move_sequence(board, seq):
	move_funs = {'r': move_row, 'c': move_col}
	for (dir, index, amount) in seq:
	move_funs[dir](board, index, amount)

	def normalise_move_sequence(rows, cols, seq):
	return tuple(filter(lambda x: x[2] != 0, ((d, i, remainder(a, rows if d == 'c' else cols)) for (d, i, a) in seq)))

	def transpose_move_sequence(seq):
	switch = {'r': 'c', 'c': 'r'}
	return tuple((switch[d], i, a) for (d, i, a) in seq)

	def len_move_sequence(seq):
	return sum(abs(a) for (d, i, a) in seq if a != 0)

	def create_board(rows, cols):
	# note that rows = height, cols = width
	return [[c + cols*r for c in range(cols)] for r in range(rows)]

	def create_random_board(rows, cols, random=random):
	if rows > 1 or cols > 1:
	l = list(range(rows * cols))
	random.shuffle(l)
	if rows % 2 == cols % 2 == 1 and permutation_parity(l) == 1:
	l[0], l[1] = l[1], l[0]
	else:
	n = rows * cols
	r = random.randrange(n)
	l = [(i+r)%n for i in range(n)]
	return [l[colsr:cols(r+1)] for r in range(rows)]

	def copy_board(board):
	return [row[:] for row in board]

	def flatten_board(board):
	flattened = []
	for row in board:
	flattened.extend(row)
	return flattened

	def prettyprint(board):
	print('\n'.join('\t'.join(map(str, row)) for row in board))

	def decompose_inverse_permutation(perm):
	"""
	Given a permutation (given as a list of integers), decompose its inverse into a product of
	involutions $\pi^{-1} = \pi_0 \pi_1 \pi_2 \dots$ (given as a list of lists of swaps) such that
	each $\pi_i$ has "uniform-distance" swaps, i.e. it is of the form
	$\pi_i = (a_0, a_0+d) (a_1, a_1+d) \dots$

	(We can obtain a decomposition for the input permutation $\pi$ itself by reversing the
	decomposition, since the decomposition consists solely of transpositions. However, we have no
	use for this.)

	This is currently done with Shellsort with 3-smooth gaps; see:
	http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/shell/shellen.htm
	Odd-even merge sort would also work, and I believe bitonic merge sort (in its bitonic,
	non-monotonised form) should also work, but those are more complicated and only get us (maybe)
	a constant factor improvement in the number of swaps.
	"""

	if len(perm) == 1:
	return []

	perm = list(perm) # make a copy that we can mess around with
	n = len(perm)
	decomposition = []

	gap = 1
	# determine the initial gap
	while 3*gap < n:
	gap *= 3
	if 2*gap < n:
	gap *= 2

	# loop over the 3-smooth numbers
	while True:
	stage = []
	# while the comparisons with 3-smooth Shell sort aren't disjoint, the resulting swaps
	# always are because an element can't move more than once for each gap.
	for i in range(n-gap):
	if perm[i] > perm[i+gap]:
	perm[i], perm[i+gap] = perm[i+gap], perm[i]
	stage.append((i, i+gap))
	if stage:
	decomposition.append(tuple(stage))

	# if the gap is 1, we're done
	if gap == 1:
	break
	# if not, update the gap
	elif gap % 2 == 0:
	gap //= 2
	else:
	gap = gap // 3
	while 2*gap < n:
	gap *= 2
	# note that the gaps are not monotonically decreasing; the gap sequence we use is
	# the reverse of:
	# 1, 2, 4, 8, ..., 3, 6, 12, 24, ..., 9, 18, 36, 72, ..., ...
	# this still works for Shell sort because every time we hit a gap g, we've already sorted by
	# 2g and by 3g.

	return tuple(decomposition)

	def permutation_parity(perm):
	return sum(map(len, decompose_inverse_permutation(perm))) % 2

	"""
	These solving functions are in-place and return the move sequence used.

	We assume the dimensions are at least 2 on each side. Silly/weird things can happen on 1xn or nx1
	boards, so don't do that.
	"""
	def solve_piece_in_first_row(board, index):
	"""
	Solve a piece that belongs in the first row.

	Constraints:
	- index < cols
	- all pieces before index in the first row are already solved

	Maximum move count:
	floor(rows/2) + floor(cols/2) + 1
	"""
	rows, cols = len(board), len(board[0])
	((r, c), ) = ((r, c) for (r, row) in enumerate(board) for (c, x) in enumerate(row) if x == index)
	if r == 0 and c == index:
	return ()
	if index == 0:
	# first piece; don't need to care about preserving anything
	move_row(board, r, -c)
	move_col(board, 0, -r)
	if r == 0:
	return (('r', r, -c), )
	if c == 0:
	return (('c', 0, -r), )
	return (('r', r, -c), ('c', 0, -r))

	sol = []
	if r == 0:
	# not the first piece, and it's stuck in the first row at the wrong position
	move_col(board, c, 1)
	sol.append(('c', c, 1))
	r += 1

	# slide the piece to the correct column
	if c != index:
	move_row(board, r, index-c)
	sol.append(('r', r, index-c))
	# then slide it to the top row
	move_col(board, index, -r)
	sol.append(('c', index, -r))
	return tuple(sol)

	def solve_phase1(board):
	"""
	Solve the first two pieces of the first row and parity.

	Maximum move count:
	rows + cols + 2
	"""
	solution = solve_piece_in_first_row(board, 0)
	solution += solve_piece_in_first_row(board, 1)
	if permutation_parity(flatten_board(board)) == 1:
	# if we have odd parity, at least one of the two dimensions must be even
	# (otherwise every board state would have even parity)
	rows = len(board)
	cols = len(board[0])
	if cols % 2 == 0:
	solution += (('r', 1, 1), )
	move_row(board, 1, 1)
	else:
	# if cols is odd, then it must be at least 3 so moving column 2 won't affect the two
	# pieces we just solved.
	solution += (('c', 2, 1), )
	move_col(board, 2, 1)
	return solution

	def solve_phase2(board):
	"""
	Solve the rest of the pieces once the first two pieces and parity are solved.

	Maximum move count:
	<= 19.25/((log 2)(log 3)) * N * log(N)^2 * (1+o(1)) if rows = O(1) or cols = O(1)
	or 11/ ... if rows, cols = o(N)
	"""
	rows, cols = len(board), len(board[0])
	stages = decompose_inverse_permutation(flatten_board(board))
	sol = []
	for stage in stages:
	diff = stage[0][1]-stage[0][0] # the gap (must be the same for all stage[i][1]-stage[i][0])
	stage_coords = tuple((divmod(a, cols), divmod(b, cols)) for (a, b) in stage)
	swaps_nooverflow = tuple((a, b) for (a, b) in stage_coords if b[0]-a[0] == diff // cols)
	swaps_overflow = tuple((a, b) for (a, b) in stage_coords if b[0]-a[0] == diff // cols + 1)
	assert len(swaps_nooverflow) + len(swaps_overflow) == len(stage_coords)

	seq = get_uniform_swaps_move_sequence(rows, cols, swaps_nooverflow)
	apply_move_sequence(board, seq)
	sol.extend(seq)

	seq = get_uniform_swaps_move_sequence(rows, cols, swaps_overflow)
	apply_move_sequence(board, seq)
	sol.extend(seq)
	return tuple(sol)

	def solve(board):
	rows, cols = len(board), len(board[0])
	return normalise_move_sequence(rows, cols, solve_phase1(board) + solve_phase2(board))

	def get_3cycle_move_sequence(rows, cols, a, b, c):
	"""
	Apply a 3-cycle moving the piece at position a to position b to position c to position a. The
	positions are specified as (row, column) coordinates.

	Maximum move count:
	\|\|a-b\|\| + \|\|b-c\|\| + \|\|c-a\|\| + 2 for non-general-position
	<= 3/2 (\|\|a-b\|\| + \|\|b-c\|\| + \|\|c-a\|\|)
	<= 3/2 (rows + cols)

	3/2 (\|\|a-b\|\| + \|\|b-c\|\| + \|\|c-a\|\|) for general position
	<= 3/2 (rows + cols)
	"""
	if a[0] == b[0] == c[0]:
	# all in the same row; move one piece into a separate row
	return (('c', c[1], 1), ) + get_3cycle_move_sequence_row_interchange(a, b, ((c[0]+1) % rows, c[1])) + (('c', c[1], -1), )

	# check if two points are in the same row; if so, then cycle the points so that the first two
	# are in the same row
	if a[0] == b[0]:
	return get_3cycle_move_sequence_row_interchange(a, b, c)
	if b[0] == c[0]:
	return get_3cycle_move_sequence_row_interchange(b, c, a)
	if c[0] == a[0]:
	return get_3cycle_move_sequence_row_interchange(c, a, b)

	if a[1] == b[1] or b[1] == c[1] or c[1] == a[1]:
	# two pieces in the same column; transpose and call into the above
	return transpose_move_sequence(get_3cycle_move_sequence(cols, rows, a[::-1], b[::-1], c[::-1]))

	# otherwise, the pieces are "in general position" (all rows distinct, all columns distinct)
	# if the sum of vertical distances is smaller than the sum of horizontal distances, move b to
	# the same row as a, then do a standard 3-cycle; otherwise, transpose first, then do that.
	sum_rowdist = sum(abs(remainder(x, rows)) for x in (a[0]-b[0], b[0]-c[0], c[0]-a[0]))
	sum_coldist = sum(abs(remainder(x, cols)) for x in (a[1]-b[1], b[1]-c[1], c[1]-a[1]))
	if sum_rowdist <= sum_coldist:
	return (('c', b[1], a[0]-b[0]), ) + get_3cycle_move_sequence_row_interchange(a, (a[0], b[1]), c) + (('c', b[1], b[0]-a[0]), )
	return (('r', b[0], a[1]-b[1]), ) + transpose_move_sequence(get_3cycle_move_sequence_row_interchange(a[::-1], (a[1], b[0]), c[::-1])) + (('r', b[0], b[1]-a[1]), )

	def get_3cycle_move_sequence_row_interchange(a, b, c):
	"""
	Assume a[0] == b[0] != c[0].

	Maximum move count:
	\|\|a-b\|\| + \|\|b-c\|\| + \|\|c-a\|\|
	<= rows + cols
	(since the sum of horizontal distances <= cols and likewise in the other direction)
	"""
	seq = (
	('r', a[0], c[1]-a[1]), # move A to same col as C
	('c', c[1], a[0]-c[0]), # move C to take A's place
	('r', a[0], a[1]-b[1]), # move B to where C is now
	('c', c[1], c[0]-a[0]), # move B to C's initial position
	('r', a[0], b[1]-c[1]), # move A and C to B's and A's initial positions
	)
	return tuple((d, i, a) for (d, i, a) in seq if a != 0)

	def check_3cycles(rows=5, cols=6):
	coordinates = ((r, c) for r in range(rows) for c in range(cols))
	for (a, b, c) in itertools.permutations(coordinates, 3):
	board = [[(r, c) for c in range(cols)] for r in range(rows)]
	seq = get_3cycle_move_sequence(rows, cols, a, b, c)
	apply_move_sequence(board, seq)
	if board[a[0]][a[1]] != c or board[b[0]][b[1]] != a or board[c[0]][c[1]] != b:
	prettyprint(board)
	print(a, b, c)
	return False
	return True

	def get_2_2cycle_move_sequence(rows, cols, a, b, c, d):
	"""
	Swap a <-> b and c <-> d by using two 3-cycles.
	Maximum move count:
	3/2 (\|\|a-b\|\| + \|\|c-d\|\| + 2\|\|a-c\|\| + \|\|b-c\|\| + \|\|a-d\|\|)
	<= 3\|\|a-b\|\| + 3\|\|c-d\|\| + 6\|\|a-c\|\|
	"""
	return get_3cycle_move_sequence(rows, cols, a, b, c) + get_3cycle_move_sequence(rows, cols, a, d, c)

	def check_2_2cycles(rows=5, cols=5):
	coordinates = ((r, c) for r in range(rows) for c in range(cols))
	for (a, b, c, d) in itertools.permutations(coordinates, 4):
	board = [[(r, c) for c in range(cols)] for r in range(rows)]
	seq = get_2_2cycle_move_sequence(rows, cols, a, b, c, d)
	apply_move_sequence(board, seq)
	if board[a[0]][a[1]] != b or board[b[0]][b[1]] != a or board[c[0]][c[1]] != d or board[d[0]][d[1]] != c:
	prettyprint(board)
	print(a, b, c, d)
	return False
	return True

	def get_uniform_swaps_move_sequence(rows, cols, swaps):
	"""
	Assume the displacement for each swap is equal.

	If there is an even number of swaps, we pair them up and apply them one by one.

	If there is an odd number of swaps, we pair up the first swap with ((0,0), (0,1)), and pair up
	the rest as usual.

	Naively applying the swaps leads to O(max(rows,cols)) moves per swap and O(max(rows,cols)*N) for
	the whole thing, so we use O(N) setup moves to force most swaps (except possibly the first one)
	to be of displacement (1, 1) (i.e. each swap is of a piece and the one to its bottom-right). As
	long as we pair up the swaps in a reasonable manner (e.g. raster scan (= lexicographic order) or
	serpentine scan), we can do all of the swaps in O(N) moves.

	If rows = cols, then the shearing procedure used here will convert every swap to a (1,1) swap,
	but in general there can be either O(rows) swaps of distance O(cols), or O(cols) swaps of
	distance O(rows), because of boundary-related weirdness. We handle all of the (1,1) swaps first,
	then repeat the shearing procedure to convert the stragglers to (1,1) swaps as well.

	(We could also do minimum-weight matchings, but the algorithms for that are kinda non-trivial
	and it's not super important anyway.)

	Maximum move count:

	Initial swap to handle parity:
	At most 4 + 4.5*(rows+cols) moves.

	The swaps that get set up to (1,1) displacement:
	At most N/4 pairs of swaps, each taking at most 12 + 6*dist moves where sum(dist) <= N.
	This gives (N/4) * 12 + 6 * N <= 9 * N.

	The remaining swaps:
	All such swaps are either in the leftmost and rightmost columns, or in the top and bottom rows.
	This leaves at most max(rows, cols)/2 pairs of swaps, each taking at most 12 + 6*dist' moves
	where sum(dist') <= max(rows, cols). This gives max(rows, cols)/2 * 12 + 6 * max(rows, cols)
	<= 12 * max(rows, cols).

	Setup moves:
	Each row is moved by at most cols/2 and each column is moved by at most rows/2, for a total of N
	moves. (The setup moves also have to be undone at the end, so this really contributes 2*N.)

	Adding everything together: 3 + 4.5(rows+cols) + 9N + 12max(rows, cols) + 2N
	= 3 + 4.5rows + 4.5cols + 12max(rows, cols) + 11N
	"""
	if len(swaps) == 0:
	return ()
	seq = []
	if len(swaps) % 2 == 1:
	# if we have an odd number of swaps, combine the first swap with a swap of (0,1);
	# this is not necessarily uniform, but it doesn't matter because we do this at most once.
	# this costs <= 3 + 4.5rows + 4.5cols moves.
	seq.extend(get_2_2cycle_move_sequence(rows, cols, (0, 0), (0, 1), *swaps[0]))
	swaps = swaps[1:]
	if len(swaps) == 0:
	return tuple(seq)

	# the displacement of each swap
	dr, dc = swaps[0][1][0]-swaps[0][0][0], swaps[0][1][1]-swaps[0][0][1]
	#print(dr, dc)

	setup_moves = []

	swaps = [(list(a), list(b)) for (a, b) in swaps]
	if dr != 0:
	# the swaps are not purely horizontal
	if dc != 1:
	block_size = abs(dr)
	adjust = 1 - dc
	# shear the whole board along blocks of rows to make it so that the horizontal
	# displacement is +1 for every swap (without affecting vertical displacement)
	for row_base in range(0, rows, block_size):
	shift_amount = ((row_base // block_size) * adjust) % cols
	for row_index in range(row_base, min(rows, row_base+block_size)):
	setup_moves.append(('r', row_index, shift_amount))
	for swap in swaps:
	for coords in swap:
	coords[1] += (coords[0] // block_size) * adjust
	coords[1] %= cols
	assert (swap[1][1] - swap[0][1]) % cols == 1
	if dr != 1:
	# then shear along columns to make it so that the vertical displacement is +1, except
	# possibly for the swaps that wrap around horizontally.
	for col_index in range(cols):
	setup_moves.append(('c', col_index, col_index*(1-dr)))
	for swap in swaps:
	for coords in swap:
	coords[0] += coords[1]*(1-dr)
	coords[0] %= rows
	if swap[1][1] != 0:
	# check if the swap straddles the left/right border; if it does, then the
	# following assert can fail so we ignore that.
	assert (swap[1][0] - swap[0][0]) % rows == 1
	else:
	# otherwise, the swaps are horizontal; essentially just do the above but transposed
	block_size = abs(dc)
	adjust = 1 # 1-dr = 1 since dr = 0
	# shear along blocks of columns
	for col_base in range(0, cols, block_size):
	shift_amount = ((col_base // block_size) * adjust) % rows
	for col_index in range(col_base, min(col_base+block_size, cols)):
	setup_moves.append(('c', col_index, shift_amount))
	for swap in swaps:
	for coords in swap:
	coords[0] += (coords[1] // block_size) * adjust
	coords[0] %= rows
	assert (swap[1][0] - swap[0][0]) % rows == 1
	if dc != 1:
	# then shear along rows
	for row_index in range(rows):
	setup_moves.append(('r', row_index, row_index*(1-dc)))
	for swap in swaps:
	for coords in swap:
	coords[1] += coords[0]*(1-dc)
	coords[1] %= cols
	if swap[1][0] != 0:
	assert (swap[1][1] - swap[0][1]) % cols == 1
	seq.extend(setup_moves)
	#print(setup_moves)

	swaps_11 = [(a, b) for (a, b) in swaps if (b[0]-a[0])%rows == (b[1]-a[1])%cols == 1]
	swaps_rest = [(a, b) for (a, b) in swaps if not ((b[0]-a[0])%rows == (b[1]-a[1])%cols == 1)]
	if len(swaps_11) % 2 == 1:
	# if we have an odd number of exceptional swaps (which also means an odd number of (1,1)
	# swaps), then we pull one out from each list and apply those swaps first, then pair up
	# the rest.
	# this costs <= 8 + 3*(rows+cols)
	seq.extend(get_2_2cycle_move_sequence(rows, cols, *(swaps_11[-1] + swaps_rest[-1])))
	swaps_11 = swaps_11[:-1]
	swaps_rest = swaps_rest[:-1]

	# the shearing might leave the swaps in a weird order, so we just sort them to ensure that when
	# we pair up the swaps, they're "not too far apart". while we can't bound the distance between
	# the swaps in each pair, we can bound the sum of those distances if we use serpentine scan.
	swaps_11.sort(key=lambda swap: (swap[0][0], swap[0][1](-1)*swap[0][0]))

	# actually do the swaps (in pairs, because we can't just do one swap with only 3-cycles)
	for (swap0, swap1) in zip(swaps_11[0::2], swaps_11[1::2]):
	seq.extend(get_2_2cycle_move_sequence(rows, cols, *(swap0 + swap1)))

	if len(swaps_rest) > 0:
	#print(swaps_rest)
	# there should be only one possible displacement among the exceptional swaps
	assert len(set(((b[0]-a[0])%rows, (b[1]-a[1])%cols) for (a, b) in swaps_rest)) == 1
	dr_ex = (swaps_rest[0][1][0]-swaps_rest[0][0][0])%rows
	dc_ex = (swaps_rest[0][1][1]-swaps_rest[0][0][1])%cols
	if dr_ex == 1:
	# shear horizontally to bring horizontal displacement to +1
	assert len(set(b[0] for (a, b) in swaps_rest)) == 1 # check that they're all in the same row
	row_index = swaps_rest[0][1][0]
	move = ('r', row_index, 1-dc_ex)
	seq.append(move)
	setup_moves.append(move)
	else:
	# dc_ex = 1
	# shear vertically to bring vertical displacement to +1
	assert len(set(b[1] for (a, b) in swaps_rest)) == 1 # check that they're all in the same column
	col_index = swaps_rest[0][1][1]
	move = ('c', col_index, 1-dr_ex)
	seq.append(move)
	setup_moves.append(move)
	# this setup takes at most max(rows/2,cols/2) moves
	swaps_rest.sort()
	for (swap0, swap1) in zip(swaps_rest[0::2], swaps_rest[1::2]):
	swap0[1][0] = (swap0[0][0] + 1) % rows
	swap0[1][1] = (swap0[0][1] + 1) % cols
	swap1[1][0] = (swap1[0][0] + 1) % rows
	swap1[1][1] = (swap1[0][1] + 1) % cols
	seq.extend(get_2_2cycle_move_sequence(rows, cols, *(swap0 + swap1)))
	# each pair of swaps here takes at most 8+4*dist where sum(dist) < max(rows,cols)

	# then undo the setup moves
	seq.extend(invert_move_sequence(setup_moves))

	return tuple(seq)
	"""
	Loopover solver for 2 × n (2 rows, n columns).

	Uses O(n log n) moves with a divide-and-conquer strategy: we sort the pieces into the correct
	columns a la quicksort. Shift all pieces that belong in the left half to the left and all pieces
	that belong in the right half to the right, then repeat this on the smaller segments. This
	eventually results in every piece being in the correct column, at which point we may apply column
	moves to fix the columns that need fixing, and we're done.

	We place some restrictions on the moves we will be making, to prevent pieces from one segment from
	mixing with another:
	1. The top row never moves.
	2. The bottom row is allowed to move, but we will ignore the horizontal looping completely.
	3. More precisely, we will track the amount the bottom row has moved as an integer in the range
	[-n+1, n-1] (inclusive on both ends, positive = shifted right), and only allow column moves in
	columns c where both c and c+shift are in-bounds.
	If shift > 0, this means shift <= c <= n-1; if shift < 0, this means 0 <= c <= n-1+shift.
	"""

	import itertools
	import math
	import random

	def remainder(x, y):
	fudge = y - y//2 - 1
	return (x + fudge) % y - fudge

	def move_row(board, row_index, amount):
	amount %= len(board[0])
	if amount == 0:
	return
	board[row_index] = board[row_index][-amount:] + board[row_index][:-amount]

	def move_col(board, col_index, amount):
	amount %= len(board)
	if amount == 0:
	return
	col = [row[col_index] for row in board]
	col = col[-amount:] + col[:-amount]
	for (row, x) in zip(board, col):
	row[col_index] = x

	def invert_move_sequence(seq):
	return tuple((dir, index, -amount) for (dir, index, amount) in seq[::-1])

	def apply_move_sequence(board, seq):
	move_funs = {'r': move_row, 'c': move_col}
	for (dir, index, amount) in seq:
	move_funs[dir](board, index, amount)

	def normalise_move_sequence(rows, cols, seq):
	return tuple(filter(lambda x: x[2] != 0, ((d, i, remainder(a, rows if d == 'c' else cols)) for (d, i, a) in seq)))

	def transpose_move_sequence(seq):
	switch = {'r': 'c', 'c': 'r'}
	return tuple((switch[d], i, a) for (d, i, a) in seq)

	def len_move_sequence(seq):
	return sum(abs(a) for (d, i, a) in seq if a != 0)

	def create_board(rows, cols):
	# note that rows = height, cols = width
	return [[c + cols*r for c in range(cols)] for r in range(rows)]

	def create_random_board(rows, cols, random=random):
	if rows > 1 or cols > 1:
	l = list(range(rows * cols))
	random.shuffle(l)
	if rows % 2 == cols % 2 == 1 and permutation_parity(l) == 1:
	l[0], l[1] = l[1], l[0]
	else:
	n = rows * cols
	r = random.randrange(n)
	l = [(i+r)%n for i in range(n)]
	return [l[colsr:cols(r+1)] for r in range(rows)]

	def copy_board(board):
	return [row[:] for row in board]

	def flatten_board(board):
	flattened = []
	for row in board:
	flattened.extend(row)
	return flattened

	def prettyprint(board):
	print('\n'.join('\t'.join(map(str, row)) for row in board))

	def invert_permutation(p):
	pinv = [None] * len(p)
	for (i, x) in enumerate(p):
	pinv[x] = i
	return pinv

	def record_move(board, seq, move):
	apply_move_sequence(board, (move,))
	seq.append(move)

	def solve_2xn(board):
	if len(board) != 2:
	raise ValueError('Board height must be 2.')
	if len(board[0]) != len(board[1]):
	raise ValueError('Board must be rectangular.')

	board = copy_board(board)
	solution = []
	m = n = len(board[0])
	segments = [(0, n)]
	while m > 1:
	# This loop body will run l := floor(log2(n)) times.

	for (a, b) in segments:
	assert b-a == m

	if m % 2 == 1:
	# If m is odd, solve the leftmost column of each segment.
	lookup = invert_permutation(flatten_board(board))
	for (a, b) in segments:
	ind_top, ind_bot = lookup[a], lookup[a+n]
	if ind_top < n > ind_bot:
	# Both pieces are in the top row; move one to the bottom.
	record_move(board, solution, ('c', ind_top, 1))
	elif ind_top >= n <= ind_bot:
	# Both pieces are in the bottom row; move one to the top.
	record_move(board, solution, ('c', ind_bot-n, 1))
	# At most one column move is used per segment.
	# Now we have one piece in the top row and one in the bottom row.
	# Sweep the bottom row m-1 cells left and insert the piece in the bottom row to the
	# top-left corner of the segment along the way.
	lookup = invert_permutation(flatten_board(board))
	inds = [[a] + sorted(lookup[a::n]) for (a, b) in segments]
	inds = [(a, i, j) for (a, i, j) in inds if not(i == j-n == a)] # skip already-done columns
	for shift in range(m):
	for (a, i, j) in inds:
	assert j >= n
	# We know it's in the bottom row already, so j-n is the column number.
	if j-n-a == shift and a != i:
	record_move(board, solution, ('c', a, 1))
	# We test for a!=i here because it is possible that there is already a piece
	# in the top-left corner, in which case the column move would be effectively
	# a no-op and hence wasted.
	if shift < m-1:
	record_move(board, solution, ('r', 1, -1))
	# Reset the bottom row.
	record_move(board, solution, ('r', 1, m-1))
	# For the segments with a != i, we have both pieces in the top row, and for the segments
	# with a == i, we still have one piece in the bottom row. In the latter case, move the
	# bottom-row piece to the top row.
	for k in range(len(inds)):
	(a, i, j) = inds[k]
	if a == i:
	assert a != j-n
	inds[k] = (a, j-n, None)
	record_move(board, solution, ('c', j-n, 1))
	# The above two for-loops together use at most one column move per segment.
	# Now do the same thing for inserting pieces in the top row to the bottom-left corner.
	for shift in range(1, m):
	record_move(board, solution, ('r', 1, 1))
	for (a, i, j) in inds:
	assert i < n
	assert a != i
	if i-a == shift:
	record_move(board, solution, ('c', i, 1))
	# The above for-loop uses at most one column move per segment as well.
	# Reset the bottom row.
	record_move(board, solution, ('r', 1, 1-m))
	# Total number of column moves: <= 3 per segment.
	# Total number of row moves: 4(m-1).
	for (a, b) in segments:
	assert {board[0][a], board[1][a]} == {a, a+n}
	m -= 1
	segments = [(a+1, b) for (a, b) in segments]

	# Now m is even.
	assert m % 2 == 0
	m2 = m // 2

	left_count = sum((x%n-a < m2) for (a, b) in segments for x in board[0][a:b])
	right_count = m*len(segments) - left_count
	#print(m, len(segments), left_count, right_count)

	is_left = [False] * (2*n)
	for (a, b) in segments:
	for i in range(a, a + m2):
	is_left[i] = is_left[i+n] = True
	is_left[i+m2] = is_left[i+m2+n] = False
	assert sum(is_left) == len(segments)*m

	if left_count >= right_count:
	# Move the bottom row m-1 cells to the left, then sweep 2(m-1) cells to the right and
	# insert left pieces to the top row along the way.
	# This uses at most m/2 column moves and exactly 3(m-1) row moves.
	# Since every location in the bottom row gets matched with every location in the top
	# row, this is guaranteed to bring every left piece to the top row, and the top row
	# will be completely filled with left pieces.
	record_move(board, solution, ('r', 1, 1-m))
	for shift in range(1-m, m):
	for (a, b) in segments:
	r = range(a, b+shift) if shift < 0 else range(a+shift, b)
	for i in r:
	if is_left[board[1][i]] and not is_left[board[0][i]]:
	record_move(board, solution, ('c', i, 1))
	if shift < m-1:
	record_move(board, solution, ('r', 1, 1))
	# Now shift the bottom row to be m/2 cells to the right.
	# (i.e. shifting left by m/2-1 cells)
	record_move(board, solution, ('r', 1, 1-m2))
	# Shift every column in the right half of each segment.
	for (a, b) in segments:
	for i in range(a+m2, b):
	record_move(board, solution, ('c', i, 1))
	# Reset the bottom row.
	record_move(board, solution, ('r', 1, -m2))
	# This uses exactly m/2 column moves and m-1 row moves.
	else:
	# Same thing as above, but mirrored.
	record_move(board, solution, ('r', 1, m-1))
	for shift in range(m-1, -m, -1):
	for (a, b) in segments:
	r = range(a, b+shift) if shift < 0 else range(a+shift, b)
	for i in r:
	if is_left[board[0][i]] and not is_left[board[1][i]]:
	record_move(board, solution, ('c', i, 1))
	if shift > 1-m:
	record_move(board, solution, ('r', 1, -1))
	record_move(board, solution, ('r', 1, m2-1))
	# Shift every column in the left half of each segment.
	for (a, b) in segments:
	for i in range(a, a+m2):
	record_move(board, solution, ('c', i, 1))
	# Reset the bottom row.
	record_move(board, solution, ('r', 1, m2))


	# Total number of column moves: <= m per segment <= n.
	# Total number of row moves: 4(m-1).

	# Finally, shrink the segments and repeat.
	m = m2
	segments = list(itertools.chain.from_iterable(((a, a+m2), (a+m2, b)) for (a, b) in segments))

	# Bounds for sorting the board into columns
	# Total number of column moves: <= ln + 3n
	# Total number of row moves: <= 4(2n-3 + 2floor(n/2)-3 + ... + 2floor(n/2^(l-1))-3)
	# <= 4(4n - 3l) = 16n - 12l

	# Fix the columns that need fixing.
	for i in range(n):
	assert {board[0][i], board[1][i]} == {i, i+n}
	if board[1][i] == i:
	record_move(board, solution, ('c', i, 1))

	# Total number of moves: <= ln + 20 n - 12 l = O(n log n) = O(N log N)
	return solution