/*
 * Binary matrix classes for different needs
 * Row matrices are more suited to matrix operations, especially inversion.
 * Column matrices are better for matrix-vector multiplication;
 * that is, actual sample generation.
 *
 */

#ifndef MATRIX_H
#define MATRIX_H

#include "common.h"
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include <vector>
#include <string>
#include <x86intrin.h>


// =============================================================================
// A small base class to handle common routines
// =============================================================================

template <typename Matrix>
class MatrixCommon {
public:
    // -------------------------------------------------------------------------
    // Print a single row
    // -------------------------------------------------------------------------
    void printRow(int i, FILE *file) const {
        for (int j = 0; j < getm(); j++) {
            fprintf(file, "%c", SYMBOLS[ getBit(i, j) ]);
        }
    }
    // -------------------------------------------------------------------------
    // Print whole matrix
    // -------------------------------------------------------------------------
    void printf(FILE *file = stdout, const char *txt = "") const {
        if (txt) fprintf(file, "%s\n", txt);
        for (int i = 0; i < getm(); i++) {
            printRow(i, file);
            fprintf(file, "\n");
        }
    }
    // -------------------------------------------------------------------------
    // Print with a list of other matrices, side-by-side
    // -------------------------------------------------------------------------
    void printSideBySide(
        const std::vector<Matrix> &others,
        FILE *file,
        const char *header,
        int indentation
    ) {
        std::vector<Matrix> tuple{*this};
        tuple.insert(tuple.begin(), others.begin(), others.end());
        MatrixCommon::printf(tuple, file, header, indentation);
    }
    // -------------------------------------------------------------------------
    // Print a list of matrices side-by-side
    // -------------------------------------------------------------------------
    static void printf(
        const std::vector<Matrix> &list,
        FILE *file = stdout,
        const char *header = "",
        int indentation = 0
    ) {
        if (header) fprintf(file, "%s\n", header);
        for (int i = 0; i < list[0].getm(); i++) {
            for (int i = 0; i < indentation; i++) fprintf(file, " ");           // Here we prefer simplicity and clarity to efficiency
            for (int j = 0; j < list.size(); j++) {
                list[j].printRow(i, file);
                if (j < list.size() - 1) fprintf(file, " ");
            }
            fprintf(file, "\n");
        }
    }
    // -------------------------------------------------------------------------
    // Print as a 32-bit C array (columns, as fixed point fractions)
    // -------------------------------------------------------------------------
    void printC32(FILE *file = stdout, const char *txt = "") const {
        if (txt) fprintf(file, "%s\n", txt);
        const int m = getm();
        for (int colIndex = 0; colIndex < m; colIndex++) {
            if (colIndex % 4) { fprintf(file,   "    "); }
            else              { fprintf(file, "\n    ");}
            uint32_t v(0);
            for (int rowIndex = 0; rowIndex < m; rowIndex++) {
                v |= getBit(rowIndex, colIndex) << (31 - rowIndex);
            }
            fprintf(file, "0x%08x", v);
            if (colIndex < m - 1) fprintf(file, ",");
        }
        fprintf(file, "\n");
    }
    // -------------------------------------------------------------------------
    // Print blocks as hexadecimal digits, q <= 4
    // -------------------------------------------------------------------------
    void printBlocks(int q, FILE *file = stdout, const char *txt = "") const {
        if (txt) fprintf(file, "%s\n", txt);
        const int m = getm();
        for (int iBlock = 0; iBlock < m/q; iBlock++) {
            for (int jBlock = 0; jBlock < m/q; jBlock++) {
                int v = 0;
                for (int i = q - 1; i >= 0; i--) {
                    for (int j = q - 1; j >= 0; j--) {
                        v = (v << 1) | getBit(iBlock * q + i, jBlock * q + j);
                    }
                }
                fprintf(file, "%x", v);
            }
            fprintf(file, "\n");
        }
        fprintf(file, "\n");
    }

protected:
    int getBit(int i, int j) const {
        return static_cast<const Matrix*>(this)->getBit(i, j);
    };
    int getm() const {
        return static_cast<const Matrix*>(this)->getm();
    };

};















// =============================================================================
// Compact Row Matrix
// Whole matrix uses a single 64-bit word, making it compact for quick
// comparison operations.
// Bits are ordered left2right, top2bottom, in increasing order of significance
// =============================================================================

template <int m> class MatrixR64;
template <int m> using TupleR64 = std::vector<MatrixR64<m>>;
template <int m> using TupleR64s = std::vector<TupleR64<m>>;

template <int m>                                                                // We template the matrix size for best speed performance
class MatrixR64 : public MatrixCommon<MatrixR64<m>> {
private:
    volatile uint64_t bits;                                                     // it cost a lot to find a bug due to compiler optimization, fixed by volatile
    // -------------------------------------------------------------------------
    // Functions for internal use
    // -------------------------------------------------------------------------
    inline void swapRows(int i, int j);                                         // Swap ith and jth rows; needed for inversion
//     inline void printRow(int i, FILE *file = stdout) const;                     // Print ith row to a file stream. Useful for printing a set of matrices
public:
    static constexpr int MASK = (1 << m) - 1;                                   // For masking an individual row
    MatrixR64(uint64_t bits = 0) : bits(bits & msk(m*m)) {};                    // Create a matrix with given content, or empty by default
    int getm() const { return m; };
    uint64_t getBits() const { return bits; };                                  // Retrieve content as a single uint64_t word
    operator uint64_t() const { return bits; };                                 // An alternative, casting to uint64_t
    // -------------------------------------------------------------------------
    // Manipulation
    // -------------------------------------------------------------------------
    uint64_t getBlock(int rowTop, int colLeft, int size) const;                 // Extract a size x size matrix block starting at designated row and column
    MatrixR64<m> putBlock(
        int rowTop, int colLeft, int size, uint64_t block) const;               // Replace a size x size block at designated row and column, and return result, leaving this unchanged
    MatrixR64<m> inverse() const;                                               // Return inverse matrix
    // -------------------------------------------------------------------------
    // Arithmetic
    // -------------------------------------------------------------------------
    inline MatrixR64 operator+ (const MatrixR64<m> &other) const;               // Return sum (xor, ^) with another matrix
    inline MatrixR64 operator& (const MatrixR64<m> &other) const;               // Return masking (&) with another matrix
    inline MatrixR64 operator* (const MatrixR64<m> &other) const;
    // -------------------------------------------------------------------------
    // Querying
    // -------------------------------------------------------------------------
    uint64_t getBit(int i, int j) const { return (bits >> (i * m + j)) & 1; };  // Retrieve bit in ith row, jth column, both indexed from 0.
    uint64_t getRow(int i) const { return (bits >> (i * m)) & MASK; }           // Retrieve ith row
    inline bool      operator==(const MatrixR64<m> &other) const;               // Equality test
    inline bool      operator!=(const MatrixR64<m> &other) const;               // Inequality test
    bool isInvertible() const;                                                  // Check if this matrix is invertible, faster than actual inversion
    bool isAlpha() const;                                                       // True if matrix is (m-1)-root of I.
    // -------------------------------------------------------------------------
    // Static class Functions
    // -------------------------------------------------------------------------
    static MatrixR64<m> I();                                                    // Create Identity matrix
};


// -----------------------------------------------------------------------------
// Create special matrices
// -----------------------------------------------------------------------------

template <int m>
MatrixR64<m> MatrixR64<m>::I() {
    uint64_t bits = 0;
    for (int i = 0; i < m; i++) {
        bits |= 1llu << (i * (m + 1));                                          // (m + 1) is one row (m) and one column (1)
    }
    return MatrixR64<m>(bits);
}

// -----------------------------------------------------------------------------
// Simple matrix operations
// -----------------------------------------------------------------------------

template <int m>
MatrixR64<m> MatrixR64<m>::operator+(const MatrixR64 &other) const {            // Addition (GF2, uses xor)
    return MatrixR64(bits ^ other.bits);
};

template <int m>
MatrixR64<m> MatrixR64<m>::operator&(const MatrixR64 &other) const {            // Masking
    return MatrixR64(bits & other.bits);
};

template <int m>
bool MatrixR64<m>::operator==(const MatrixR64 &other) const {                   // Equality test
    return bits == other.bits;
};

template <int m>
bool MatrixR64<m>::operator!=(const MatrixR64 &other) const {                   // Inequality Test
    return bits != other.bits;
};

// -----------------------------------------------------------------------------
// Retrieve a block, typically to use as a sub-matrix
// -----------------------------------------------------------------------------

template <int m>
uint64_t MatrixR64<m>::getBlock(int rowTop, int colLeft, int size) const {
    uint64_t block(0);
    for (int i = 0; i < std::min(size, m - rowTop); i++) {
        uint64_t row = (getRow(rowTop + i) >> colLeft) & msk(size);             // Extract row and trim to block size
        block |= row << (i * size);                                             // Insert in appropriate position in the block
    }
    return block;
};

// -----------------------------------------------------------------------------
// Replace a designated block by a given uint64_t representation of a matrix.
// -----------------------------------------------------------------------------

template <int m>
MatrixR64<m> MatrixR64<m>::putBlock(
    int rowTop, int colLeft, int size, uint64_t block
) const {
    uint64_t result = bits;                                                     // Clone this
    block ^= getBlock(rowTop, colLeft, size);                                   // Xor with original bits to wipe them upon the next xor
    for (int i = 0; i < std::min(size, m - rowTop); i++) {
        uint64_t row = ((block >> (i * size)) & msk(size));                     // Extract a row from block
        row <<= colLeft;                                                        // Shift to appropriate column
        row &= MASK;                                                            // Trim spilled out bits, if any
        result ^= row << ((rowTop + i) * m);                                    // Insert in appropriate position
    }
    return MatrixR64<m>{result};
}

// -----------------------------------------------------------------------------
// Multiplication by another matrix on the right
// -----------------------------------------------------------------------------

template <int m>
MatrixR64<m> MatrixR64<m>::operator*(const MatrixR64 &other) const {
    MatrixR64 out(0);
    for (int i = 0; i < m; i++) {
        int v = getRow(i);                                                      // Extract a row vector from this
        uint64_t accum(0);                                                      // Clear an accumulator
        for (int j = 0; j < m; j++) {
            accum ^= ((v >> j) & 1) * other.getRow(j);                          // jth bit of left-multiplier x jth column of right multiplier
        }
        out.bits |= accum << (i * m);                                           // Place computer row in corresponding place to extracted row
    }
    return out;
}

// -----------------------------------------------------------------------------
// Check if matrix is invertible using Gaussian elimination.
// -----------------------------------------------------------------------------

template <int m>
bool MatrixR64<m>::isInvertible() const {
    MatrixR64 tmp(bits);
    for (int i = 0; i < m; i++) {                                               // Iterate through rows
        uint64_t v = tmp.getRow(i);                                             // Retrieve row vector
        if (!v) return false;                                                   // The matrix is singular if a row is empty
        int col = __builtin_ctz(v);                                             // Index of pivot column, the position of left-most set bit (count trailing zeors), but the bit order is mirrored in our encoding
        for (int j = i + 1; j < m; j++) {                                       // Iterate through following columns
            tmp.bits ^= (tmp.getBit(j, col) * v) << (j * m);                    // Eliminate column of reference row from ith row:
            if (!tmp.getRow(j)) return false;                                   // If row is zeroed then the matrix is singular
        }
    }
    return true;
}

// -----------------------------------------------------------------------------
// Invert using Gaussian elimination.
// -----------------------------------------------------------------------------

template <int m>
void MatrixR64<m>::swapRows(int i, int j) {                                     // Auxiliary function to swap ith and jth rows
    volatile uint64_t x = getRow(i) ^ getRow(j);                                // Compiler optimization caused a subtle bug here fixed by volatile
    bits ^= (x << (i * m)) | (x << (j * m));
};

template <int m>
MatrixR64<m> MatrixR64<m>::inverse() const {
    MatrixR64 in(*this), out(MatrixR64<m>::I());                                      // Initialize a tmp matrix to input, and output matrix to identity
    for (int rowIndex = 0; rowIndex < m; rowIndex++) {                          // Iterate through rows
        uint64_t v = in.getRow(rowIndex);                                       // Retrieve row vector
        int pivot = 1 << rowIndex;                                              // Mask for diagonal bit
        if (!(v & pivot)) {                                                     // Is it missing in the current row?
            for (int i = rowIndex + 1; i < m; i++) {                            // Scan through subsequent rows
                if (in.getRow(i) & pivot) {                                     // If a row provides this bit
                    in.swapRows(rowIndex, i);                                   // Swap it with the current row; equivalent to {in.rows[row] ^= in.rows[i]; in.rows[i] ^= in.rows[row]}.
                    out.swapRows(rowIndex, i);                                  // Copy operation to output
                    v = in.getRow(rowIndex);                                    // Update to new contents
                    break;                                                      // And stop searching further
                }
            }
        }
        if ((v & pivot) == 0) {                                                 // Still no luck?
            return (MatrixR64<m>(0));                                              // Return an empty matrix to indicate failure
        }
        for (int i = rowIndex + 1; i < m; i++) {                                // Now iterate through following rows to eliminate pivotal column
            if (in.getRow(i) & pivot) {                                         // If a row contains a set bit in pivotal column ..
                in.bits ^= v << (i * m);                                        // Eliminate it by adding reference row
                out.bits ^= out.getRow(rowIndex) << (i * m);                    // Copy operation to output
            }
        }
    }
    for (int rowIndex = m - 1; rowIndex >= 0; rowIndex--) {                     // Now iterate backward to eliminate upper triangle; all diagonal bits are set!
        int pivot = 1 << rowIndex;                                              // Mask for diagonal bit
        for (int i = rowIndex - 1; i >= 0; i--) {                               // Iterate through preceding rows
            if (in.getRow(i) & pivot) {                                         // If a row contains a set bit in pivotal column ..
                in.bits ^= in.getRow(rowIndex) << (i * m);                      // Now we have a 1 in diagonal entry
                out.bits ^= out.getRow(rowIndex) << (i * m);                    // Copy operation to output
            }
        }
    }
    return out;
}

// -----------------------------------------------------------------------------
// Check if matrix is (m-1)-root of I
// -----------------------------------------------------------------------------

template <int m>
bool MatrixR64<m>::isAlpha() const {
    if (!isInvertible()) return false;
    MatrixR64<m> power(*this), I(MatrixR64<m>::I());
    for (int exponent = 2; exponent < (1 << m) - 1; exponent++) {
        power = (*this) * power;
        if (power == I) return false;
    }
    return true;
}


































// =============================================================================
// Row Matrix using arrays
// =============================================================================

class MatrixR;
using TupleR = std::vector<MatrixR>;
using TupleRs = std::vector<TupleR>;

class MatrixR : public MatrixCommon<MatrixR> {
private:
    int m;
    volatile uint64_t rows[64];
public:
    // -------------------------------------------------------------------------
    // Construction
    // -------------------------------------------------------------------------
    MatrixR(int m = 0): m(m) { for (int i = 0; i < 64; i++) rows[i] = 0; };
    MatrixR(const std::vector<uint64_t> &rows);                                 // Construct from a vector of rows
    MatrixR(std::initializer_list<uint64_t> data);                              // Construct from initializer list of rows
    MatrixR(const uint32_t *data, int m = 32, bool dataIsColumns = false);      // Construct from array of rows or columns
    MatrixR(const uint64_t *data, int m = 64, bool dataIsColumns = false);      // Construct from array of rows or columns
    template <int m>
    MatrixR(const MatrixR64<m> &matrixR64);                                     // Inflate from a compact row matrix
    static MatrixR I(int m);                                                    // Create Identity matrix
    static MatrixR J(int m);                                                    // Create anti-diagonal matrix
    static MatrixR P(int m);                                                    // Create Pascal matrix
    static MatrixR L(int m);                                                    // Create a random lower-triangular matrix
    static MatrixR U(int m);                                                    // Create a random upper-triangular matrix
    static MatrixR createInvertible(int m);                                     // Create a random invertible matrix
    static MatrixR IBlock(int m, const MatrixR &block);                         // Create a matrix with block along diagonal
    static MatrixR curl(int m, int q);                                          // Create a curling matrix of random invertible qxq blocks along diagonal
    static MatrixR repeatRow(int m, uint64_t row);                              // Repeat a row; useful for debugging
    static MatrixR companion(uint32_t polynomial);                              // Companion matrix to a given polynomial
    static MatrixR IMask(int m, uint64_t mask);                                 // Identity masked by a given bit pattern; bit (m-1) of mask goes to row 0
    // -------------------------------------------------------------------------
    // Arithmetic
    // -------------------------------------------------------------------------
    MatrixR operator^ (const MatrixR &other) const;                             // Return xor-sum with other matrix, original not changed
    MatrixR operator+ (const MatrixR &other) const {return *this ^ other; };    // Return GF2 sum with other matrix, original not changed
    MatrixR operator- (const MatrixR &other) const {return *this ^ other; };    // Return difference from other matrix, original not changed;
    MatrixR operator& (const MatrixR &other) const;                             // Return and-masking with other matrix, original not changed
    MatrixR operator* (const MatrixR &other) const;                             // Multiply by a matrix on the right.
    MatrixR power(int exponent) const;                                          // Return this multiplied by itself exponent times
    // -------------------------------------------------------------------------
    // Manipulation
    // -------------------------------------------------------------------------
    MatrixR setm(int mNew) const;                                               // Crop or expand to mNew x mNew
    MatrixR shift(int i, int j) const;                                          // Shift by i rows and j columns and return result, leaving this unchanged
    MatrixR getBlock(int rowTop, int colLeft, int size) const;                  // Extract a size x size matrix block starting at designated row and column
    MatrixR putBlock(int rowTop, int colLeft, const MatrixR &block) const;      // Replace a designate block by a given matrix and return a new matrix, keep this intact
    MatrixR hybrid(const MatrixR &other, int i) const;                          // Construct a hybrid by inserting leading rows of other from row i on.
    MatrixR inverse();                                                          // Return inverse matrix, or an empty matrix is singular
    // -------------------------------------------------------------------------
    // Querying
    // -------------------------------------------------------------------------
    uint64_t getRow(int i) const { return rows[i]; };                           // Retrieve a row
    uint64_t getBit(int i, int j) const { return (rows[i] >> j) & 1; };         // Retrieve bit in ith row, jth column, both indexed from 0.
    uint64_t getDiagonal(int i) const;                                          // Retrieve ith diagonal; 0 is main, -/+ are left/rigth; row0 is most significant bit in m-bit representation
    int getm() const { return m; };
    bool operator==(const MatrixR &other) const;
    bool operator!=(const MatrixR &other) const;
    bool operator<(const MatrixR &other) const;                                 // Needed for sorting alphabets,
    bool operator>(const MatrixR &other) const;                                 // we just use the first row for comparison so that 0 and I are the smallest
    bool isInvertible() const;                                                  // Check if this matrix is invertible
    bool isAlpha() const;                                                       // True if matrix is (m-1)-root of I.
    bool netMatches(const TupleR &others, int mBlock, FILE *log = NULL) const;  // Test if this can make a (0, m) net with others by testing if all block-hybrids are invertible. optionally log the constructed hybrid matrices
    static bool netMatch (const TupleR &tuple, int mBlock, FILE *log = NULL);   // Test if a tuple makes a (0, m) net
    static bool seqMatch (                                                      // Test if a tuple progressively makes a (0, m) net
        const TupleR &tuple,
        int mBlock,
        bool relative = false,                                                  // Factor out first matrix to unshuffle
        FILE *log = NULL
    );
    // -------------------------------------------------------------------------
    // Drawing samples via linear vector multiplication
    // -------------------------------------------------------------------------
    uint64_t operator[](uint64_t seqNo) const;                                  // seqNo is normally ordered, not bit reversed
    // -------------------------------------------------------------------------
    // Tuple operations
    // -------------------------------------------------------------------------
    static TupleR setm(const TupleR &tuple, int mNew);                          // Apply setm to a whole tuple
};

// -----------------------------------------------------------------------------
// Tuple operations
// -----------------------------------------------------------------------------

TupleR operator*(const MatrixR &Left, const TupleR &in) {
    TupleR out(in);
    for (int i = 0; i < out.size(); i++) {
        out[i] = Left * out[i];
    }
    return out;
}

TupleR operator*(const TupleR &in, const MatrixR &Right) {
    TupleR out(in);
    for (int i = 0; i < out.size(); i++) {
        out[i] = out[i] * Right;
    }
    return out;
}

// -----------------------------------------------------------------------------
// Construct from a given vector of rows
// -----------------------------------------------------------------------------

MatrixR::MatrixR(const std::vector<uint64_t> &data) {
    m = data.size();
    for (int i = 0; i < m; i++) {
        rows[i] = data[i] & msk(m);                                             // Clear higher order column bits for safety
    }
    for (int i = m; i < 64; i++) { rows[i] = 0; }                               // Clear remaining rows for safety
}

// -----------------------------------------------------------------------------
// Construct from a given list of rows
// -----------------------------------------------------------------------------

MatrixR::MatrixR(std::initializer_list<uint64_t> data) {
    m = data.size();
    std::copy(data.begin(), data.end(), rows);
    for (int i = 0; i < m; i++) {
        rows[i] &= msk(m);                                                      // Clear higher order column bits for safety
    }
    for (int i = m; i < 64; i++) { rows[i] = 0; }                               // Clear remaining rows for safety
}

// -----------------------------------------------------------------------------
// Construct from array of rows or columns
// -----------------------------------------------------------------------------

MatrixR::MatrixR(const uint32_t *data, int m, bool dataIsColumns): m(m) {
    for (int i = 0; i < 64; i++) { rows[i] = 0; }                               // Clear all for safety
    if (dataIsColumns) {
        for (int i = 0; i < m; i++) {
            for (int j = 0; j < m; j++) {
                rows[i] |= ((data[j] >> ((m-1) - i)) & 1) << j;
            }
        }
    }
    else {
        std::copy(data, data + m, rows);
        for (int i = 0; i < m; i++) {
            rows[i] &= msk(m);                                                  // Clear higher order column bits for safety
        }
    }
}

// -----------------------------------------------------------------------------
// Construct from array of rows or columns
// -----------------------------------------------------------------------------

MatrixR::MatrixR(const uint64_t *data, int m, bool dataIsColumns): m(m) {
    for (int i = 0; i < 64; i++) { rows[i] = 0; }                               // Clear all for safety
    if (dataIsColumns) {
        for (int i = 0; i < m; i++) {
            for (int j = 0; j < m; j++) {
                rows[i] |= ((data[j] >> ((m-1) - i)) & 1) << j;
            }
        }
    }
    else {
        std::copy(data, data + m, rows);
        for (int i = 0; i < m; i++) {
            rows[i] &= msk(m);                                                  // Clear higher order column bits for safety
        }
    }
}

// -----------------------------------------------------------------------------
// Construct from a compact row matrix
// -----------------------------------------------------------------------------

template <int mR64>                                                             // We need to name the template parameter differently here
MatrixR::MatrixR(const MatrixR64<mR64> &matrixR64): m(mR64) {
    for (int i = 0; i < m; i++) {
        rows[i] = matrixR64.getRow(i);                                          // Clear higher order column bits for safety
    }
    for (int i = m; i < 64; i++) { rows[i] = 0; }                               // Clear remaining rows for safety
}

// -----------------------------------------------------------------------------
// Create identity matrix
// -----------------------------------------------------------------------------

MatrixR MatrixR::I(int m) {
    MatrixR result(m);
    for (int i = 0; i < m; i++) {
        result.rows[i] = 1llu << i;
    }
    return result;
}

// -----------------------------------------------------------------------------
// Create anti-diagonal matrix
// -----------------------------------------------------------------------------

MatrixR MatrixR::J(int m) {
    MatrixR result(m);
    for (int i = 0; i < m; i++) {
        result.rows[i] = 1llu << (m-1 - i);
    }
    return result;
}


// -----------------------------------------------------------------------------
// Create Pascal matrix
// -----------------------------------------------------------------------------

MatrixR MatrixR::P(int m) {
    MatrixR result(m);
    uint64_t col(1);
    for (int j = 0; j < m; j++) {
        for (int i = 0; i <= j; i++) {
            result.rows[i] |= ((col >> i) & 1) << j;                            // Insert ith bit of jth column in the jth place of ith row
        }
        col ^= col << 1;
    }
    return result;
}

// -----------------------------------------------------------------------------
// Create a random lower-triangular matrix
// -----------------------------------------------------------------------------

MatrixR MatrixR::L(int m) {
    MatrixR result(m);
    result.rows[0] = 1;
    for (int i = 1; i < m; i++) {
        result.rows[i] = (1llu << i) | (rnd() & msk(i));
    }
    return result;
}

// -----------------------------------------------------------------------------
// Create a random upper-triangular matrix
// -----------------------------------------------------------------------------

MatrixR MatrixR::U(int m) {
    MatrixR result(m);
    for (int i = 0; i < m; i++) {
        result.rows[i] = ((rnd() | 1) << i) & msk(m);
    }
    return result;
}

// -----------------------------------------------------------------------------
// Create a random invertible matrix
// ~30% of matrices are invertible, so we just use random sampling
// -----------------------------------------------------------------------------

MatrixR MatrixR::createInvertible(int m) {
    MatrixR result(m);
    uint64_t reduced[64];                                                       // For a trial Gaussian elimination
    for (int i = 0; i < m; i++) {
        for (bool pass = false; !pass; ) {
            uint64_t candidate(rnd() & msk(m));                                 // Generate a random row and copy it for Gaussian elimination trial
            reduced[i] = candidate;
            for (int j = 0; (j < i) && reduced[i]; j++) {                       // Iterate through preceding rows
                if ( reduced[i] & (1llu << __builtin_ctz(reduced[j])) ) {       // Use least significant bit (semantically leftmost column) as a pivot
                    reduced[i] ^= reduced[j];
                }
            }
            if (reduced[i]) {                                                   // Something remains after elimination?
                pass = true;
                result.rows[i] = candidate;
            }
        }
    }
    return result;
}


// -----------------------------------------------------------------------------
// Create diagonal block matrix
// -----------------------------------------------------------------------------

MatrixR MatrixR::IBlock(int m, const MatrixR &block) {
    MatrixR result = I(m);
    for (int i = 0; i < m - (block.m - 1); i += block.m) {                      // Subtracting (mBlock - 1) is to ensure only full blocks
        result = result.putBlock(i, i, block);
    }
    return result;
}

// -----------------------------------------------------------------------------
// Create a curling matrix of random invertible qxq blocks along diagonal
// -----------------------------------------------------------------------------

MatrixR MatrixR::curl(int m, int q) {
    MatrixR result = I(m);
    for (int i = 0; i < m - (q - 1); i += q) {                                  // Subtracting (q - 1) is to ensure only full blocks
        result = result.putBlock(i, i, createInvertible(q));
    }
    return result;
}


// -----------------------------------------------------------------------------
// Repeat a row; useful for debugging
// -----------------------------------------------------------------------------

MatrixR MatrixR::repeatRow(int m, uint64_t row) {
    MatrixR result(m);
    row &= msk(m);                                                              // Sanitize
    for (int i = 0; i < m; i++) {
        result.rows[i] = row;
    }
    return result;
}

// -----------------------------------------------------------------------------
// Companion matrix of a polynomial
// -----------------------------------------------------------------------------

MatrixR MatrixR::companion(uint32_t p) {
    if (!p) { return MatrixR(); }
    int m = 31 - __builtin_clz(p);
    MatrixR result = I(m).shift(-1, 0);
    result.rows[m - 1] = p & msk(m);
    return result;
}

// -----------------------------------------------------------------------------
// Create identity matrix masked by a bit pattern;
// Bit (m-1) of mask goes to row 0
// -----------------------------------------------------------------------------

MatrixR MatrixR::IMask(int m, uint64_t mask) {
    MatrixR result(m);                                                          // empty matrix
    for (int i = 0; i < m; i++) {
        uint64_t bit = (mask >> (m-1 - i)) & 1;
        result.rows[i] = bit << i;
    }
    return result;
}

// -----------------------------------------------------------------------------
// Extract a 'size x size' block matrix starting at designated row and column.
// -----------------------------------------------------------------------------

MatrixR MatrixR::getBlock(int rowTop, int colLeft, int size) const {
    MatrixR result(size);
    for (int i = 0; i < std::min(size, m - rowTop); i++) {
        result.rows[i] = (rows[rowTop + i] >> colLeft) & msk(size);
    }
    return result;
}

// -----------------------------------------------------------------------------
// Replace a designated block by a given matrix.
// -----------------------------------------------------------------------------

MatrixR MatrixR::putBlock(
    int rowTop, int colLeft, const MatrixR &block
) const {
    MatrixR result(*this);                                                      // Clone this
    MatrixR blend = block + getBlock(rowTop, colLeft, block.m);                 // Xor with original bits to wipe them upon the next xor
    for (int i = 0; i < std::min(block.m, m - rowTop); i++) {
        result.rows[rowTop + i] ^= (blend.rows[i] << colLeft) & msk(m);
    }
    return result;
}

// -----------------------------------------------------------------------------
// Construct a hybrid by inserting leading rows of other from row i on.
// -----------------------------------------------------------------------------

MatrixR MatrixR::hybrid(const MatrixR &other, int i) const {
    MatrixR out(*this);                                                         // Clone this
    for (int j = 0; i < m; i++, j++) {
        out.rows[i] = other.rows[j];
    }
    return out;
}

// -----------------------------------------------------------------------------
// Invert using Gaussian elimination.
// -----------------------------------------------------------------------------

MatrixR MatrixR::inverse() {
    MatrixR in(*this), out(MatrixR::I(m));
    for (int row = 0; row < m; row++) {                                         // Iterate throw rows
        uint64_t pivot = 1llu << row;                                           // Diagonal bit
        if ((in.rows[row] & pivot) == 0) {                                      // Is it missing in the current row?
            for (int i = row + 1; i < m; i++) {                                 // Scan the subsequent rows
                if (in.rows[i] & pivot) {                                       // If a row provides this bit
                    std::swap(in.rows[row], in.rows[i]);                        // Swap it with the current row; equivalent to {in.rows[row] ^= in.rows[i]; in.rows[i] ^= in.rows[row]}.
                    std::swap(out.rows[row], out.rows[i]);                      // Same same.
                    break;
                }
            }
        }
        if ((in.rows[row] & pivot) == 0) {                                      // Still no luck?
            return (MatrixR(0));                                                // Return a size-0 matrix.
        }
        for (int i = row + 1; i < m; i++) {                                     // Iterate through subsequent rows
            if (in.rows[i] & pivot) {                                           // If they contain diagonal bit ..
                in.rows[i] ^= in.rows[row];                                     // Eliminate it.
                out.rows[i] ^= out.rows[row];                                   // Same same
            }
        }
    }
    for (int row = m - 1; row >= 0; row--) {                                    // Now iterate backward to eliminate upper triangle; all diagonal bits are set!
        uint64_t pivot = 1llu << row;
        for (int i = row - 1; i >= 0; i--) {
            if (in.rows[i] & pivot) {
                in.rows[i] ^= in.rows[row];                                               // Now we have a 1 in diagonal entry
                out.rows[i] ^= out.rows[row];
            }
        }
    }
    return out;
}

// -----------------------------------------------------------------------------
// Shift by i rows and j columns and return result, leaving this unchanged
// -----------------------------------------------------------------------------

MatrixR MatrixR::shift(int upDown, int leftRight) const {
    MatrixR result(m);
    if (upDown < 0) {
        for (int i = -upDown; i < m; i++) {
            result.rows[i + upDown] = rows[i];
        }
    }
    else {
        for (int i = upDown; i < m; i++) {
            result.rows[i] = rows[i - upDown];
        }
    }
    if (leftRight < 0) {
        for (int i = 0; i < m; i++) {
            result.rows[i] >>= -leftRight;
        }
    }
    else {
        for (int i = 0; i < m; i++) {
            result.rows[i] = (result.rows[i] << leftRight) & msk(m);
        }
    }
    return result;
}

// -----------------------------------------------------------------------------
// Crop or expand to mNew x mNew
// -----------------------------------------------------------------------------

MatrixR MatrixR::setm(int mNew) const {
    MatrixR out(mNew);
    for (int i = 0; i < mNew; i++) {
        out.rows[i] = rows[i] & msk(mNew);
    }
    return out;
}

TupleR MatrixR::setm(const TupleR &in, int m) {
    TupleR out(in);
    for (int i = 0; i < out.size(); i++) {
        out[i] = out[i].setm(m);
    }
    return out;
}

// -----------------------------------------------------------------------------
// Multiply by a matrix on the right.
// -----------------------------------------------------------------------------

MatrixR MatrixR::operator*(const MatrixR &other) const {
    MatrixR result(m);                                                           // Create an empty matrix
    for (int row = 0; row < m; row++) {                                         // Iterate through rows of this matrix
        for (int i = 0; i < m; i++) {                                           // Iterate through entries of current row
            result.rows[row] ^= ((rows[row] >> i) & 1) * other.rows[i];         // Multiply bit by row
        }
    }
    return result;
}

// -----------------------------------------------------------------------------
// Sum with anotehr matrix
// -----------------------------------------------------------------------------

MatrixR MatrixR::operator^(const MatrixR &other) const {
    MatrixR result(*this);                                                       // Clone this
    for (int i = 0; i < m; i++) {                                               // Iterate through rows of this matrix
        result.rows[i] ^= other.rows[i];
    }
    return result;
}

// -----------------------------------------------------------------------------
// Mask by anotehr matrix
// -----------------------------------------------------------------------------

MatrixR MatrixR::operator&(const MatrixR &other) const {
    MatrixR result(*this);                                                       // Clone this
    for (int i = 0; i < m; i++) {                                               // Iterate through rows of this matrix
        result.rows[i] &= other.rows[i];
    }
    return result;
}


// -----------------------------------------------------------------------------
// Return this multiplied by itself exponent times
// For now we use a naive loop, but it can be optimized as a binary tree
// -----------------------------------------------------------------------------

MatrixR MatrixR::power(int exponent) const {
    MatrixR result(I(m));
    for (int time = 0; time < exponent; time++) {
        result = result * (*this);
    }
    return result;
}

// -----------------------------------------------------------------------------
// Retrieve ith diagonal; 0 is main, -/+ are left/rigth;
// row0 is most significant bit in m-bit representation
// -----------------------------------------------------------------------------

uint64_t MatrixR::getDiagonal(int i) const {
    uint64_t result(0);
    if (i >= 0) {
        for (int j = i; j < m; j++) {
            result |= getBit(j - i, j) << (m-1 - (j - i));
        }
    }
    else {
        for (int j = 0; j < m + i; j++) {                                       // Remember i is negative
            result |= getBit(j - i, j) << (m-1 - (j - i));
        }
    }
    return result;
}

// -----------------------------------------------------------------------------
// Equality and inequality test
// -----------------------------------------------------------------------------

bool MatrixR::operator==(const MatrixR &other) const {
    if (other.m != m) return false;
    for (int i = 0; i < m; i++) {
        if ((rows[i] ^ other.rows[i]) & msk(m)) return false;
    }
    return true;
}

bool MatrixR::operator!=(const MatrixR &other) const {
    return !(*this == other);
}

// -----------------------------------------------------------------------------
// Greater and smaller than for sorting alphabets
// -----------------------------------------------------------------------------

bool MatrixR::operator>(const MatrixR &other) const {
    return rows[0] > other.rows[0];
}

bool MatrixR::operator<(const MatrixR &other) const {
    return rows[0] < other.rows[0];
}

// -----------------------------------------------------------------------------
// Check if matrix is invertible without completing the inversion process
// -----------------------------------------------------------------------------

bool MatrixR::isInvertible() const {
    if (!rows[0]) return false;
    MatrixR tmp(*this);
    for (int iref = 0; iref < m-1; iref++) {
        int j = __builtin_ctz(tmp.rows[iref]);                                  // Index of pivot column
        for (int i = iref + 1; i < m; i++) {
            tmp.rows[i] ^= tmp.getBit(i, j) * tmp.rows[iref];                   // Eliminate column of reference row from ith row
            if (!tmp.rows[i]) return false;                                     // If row is zeroed then the matrix is singular
        }
    }
    return true;
}

// -----------------------------------------------------------------------------
// Check if matrix is (m-1)-root of I
// -----------------------------------------------------------------------------

bool MatrixR::isAlpha() const {
    if (!isInvertible()) return false;
    MatrixR power(*this), I(MatrixR::I(m));
    for (int exponent = 2; exponent < (1 << m) - 1; exponent++) {
        power = (*this) * power;
        //power.printf(stdout, "\n");
        if (power == I) return false;
    }
    return true;
}

// -----------------------------------------------------------------------------
// Test if this can make a (0, m) net with others by testing if all
// block-hybrids are invertible
// -----------------------------------------------------------------------------

bool MatrixR::netMatches(const TupleR &others, int mBlock, FILE *log) const {
    const int nBlockRows = m / mBlock;
    int n = others.size();                                                      // Number of proposed tupled matrices
    std::vector<char> title(n + 2, '0');                                        // 1 digit for this + n digits for others + null terminating character
    title.back() = 0;                                                           // NULL terminating char
    if (log) {
        title[0] = nBlockRows + '0';
        printf(log, title.data()); fprintf(log, "\n");
    }
    if (!isInvertible()) return false;                                          // Matrix itself must be invertible
    if (!n) return true;                                                        // If no others then we are done
    for (int i = nBlockRows - 1; i > 0; i--) {                                  // Iterate through number of block-rows taken from this
        for (NestedCounter counter{nBlockRows - i, n-1}; counter; counter++) {                                                                     // Count through combinations of (nBlockRows - i) row pairs over n matrices
            MatrixR hybrid(*this);                                              // Clone this in hybrid matrix
            int hybridRow = mBlock * i;                                         // Next insertion row: ith block row
            title[0] = i + '0';
            std::vector<int> counts = counter.getCounts();
            for (int k = 0; k < n; k++) {
                for (
                    int srcRow = 0;                                             // Source row index, starting at top
                    srcRow < counts[k] * mBlock;                                    // All through count block-rows of mBlock rows
                    srcRow++, hybridRow++                                       // Advance source read and hybrid write pointers
                ) {
                    hybrid.rows[hybridRow] = others[k].rows[srcRow];            // Copy the row to hybrid
                }
                title[k + 1] = counts[k] + '0';
            }
            if (log) {
                hybrid.printf(log, title.data()); fprintf(log, "\n");
            }
            if (!hybrid.isInvertible()) return false;                                    // Return false immediately if a hybrid is non-invertible, unless logging is requested
        }
    }
    return true;
}

// -----------------------------------------------------------------------------
// Test if a tuple makes a (0, m) net
// -----------------------------------------------------------------------------

bool MatrixR::netMatch(const TupleR &tuple, int mBlock, FILE *log) {
    TupleR subset;
    for (int i = 0; i < tuple.size(); i++) {                                    // Iterate through items not yet added, so start with 1
        if (log) {
            fprintf(log, "\nAdding matrix %d:\n=======================\n", i);
        }
        if ( !tuple[i].netMatches(subset, mBlock, log) ) { return false; }      // Each needs to match all preceding ones
        subset.push_back(tuple[i]);
    }
    return true;
}

// -----------------------------------------------------------------------------
// Progressively test for netmatching
// -----------------------------------------------------------------------------

bool MatrixR::seqMatch(
    const TupleR &tuple, int mBlock, bool relative, FILE *log
) {
    TupleR t = tuple;
    if (relative) { t = t * t[0].inverse(); }
    for (int i = 1; i <= t[0].m / mBlock; i++) {
        TupleR leading = setm(t, i * mBlock);
        if (log) {
            printf(leading, log, "leading block:");
        }
        if (!MatrixR::netMatch(leading, mBlock, log)) {
            return false;
        }
    }
    return true;
}

// -----------------------------------------------------------------------------
// Multiply by a sequence number in normal (not reversed) bit ordering
// -----------------------------------------------------------------------------

uint64_t MatrixR::operator[](uint64_t seqNo) const {
    uint64_t result(0);
    for (int i = 0; i < m; i++) {
        result = (result << 1) | (_popcnt64(rows[i] & seqNo) & 1);
    }
    return result;
}


















#endif                                                                          // ifndef MATRIX_H
