/*
 * Implementation of SZ-based samplers
 * 2025-08-25: Created by Abdalla Ahmed from earlier versions.
 *
 */

#ifndef PBRT_SZ_SAMPLERS_H

#include <pbrt/base/sampler.h>
#include <pbrt/util/rng.h>
#include <pbrt/util/math.h>

//#include <x86intrin.h>                                                          // needed for fast Morton encoding/decoding using _pdep_u32 and _pext_u32

namespace pbrt {
;

// =============================================================================
// QARTOwen class for shuffling z indices
// =============================================================================

class QARTOwen {
private:
    unsigned char matrices[256];
    uint32_t xorCodes[256];
public:
    // -------------------------------------------------------------------------
    // Constructor
    // -------------------------------------------------------------------------
    PBRT_CPU_GPU
    inline QARTOwen() {
        RNG rng;
        const int Invertible2x2[6] = {9, 7, 14, 6, 13, 11};                     // Invertible 2x2 matrices as column vectors
        for (int id = 0; id < 256; id++) {
            matrices[id] = Invertible2x2[ rng.Uniform<uint32_t>() % 6 ];
            xorCodes[id] = rng.Uniform<uint32_t>();
        }
    }
    // -------------------------------------------------------------------------
    // Production
    // -------------------------------------------------------------------------
    PBRT_CPU_GPU
    inline int getChildId1D(int parentID, int childIndex) const {               // Retrieve id of a child in 1D
        const uint64_t P[2] = {0x9854541098985410llu, 0xcdefef23cdab6723llu};
        return (P[childIndex] >> (4 * parentID)) & 15;
    };
    PBRT_CPU_GPU
    inline int getChildId(int parentID, int childIndex) const {                 // Retrieve id of a child in 2D
        return (
            (getChildId1D(parentID >> 4, childIndex >> 1) << 4) |               // Production on Y axis
            (getChildId1D(parentID & 15, childIndex  & 1))                      // Production on X axis
        );
    }
    // -------------------------------------------------------------------------
    // 2-bit Matrix-vector multiplication
    // We don't really care much here about bit ordering
    // -------------------------------------------------------------------------
    PBRT_CPU_GPU
    inline int MxV(int M, int V) const {
        return ((V & 1) * (M & 3)) ^ ((V >> 1) * (M >> 2));
    }
    // -------------------------------------------------------------------------
    // Actual scrambling
    // TODO: This can be optimized further by combining MxV multiplications
    // -------------------------------------------------------------------------
    PBRT_CPU_GPU
    inline uint32_t operator()(uint32_t x) const {
        uint32_t X(0), xorCode(0);
        for (int id = 0, pos = 30; pos >= 0; pos -= 2) {
            xorCode ^= xorCodes[id] >> (30 - pos);
            int digit = (x >> pos) & 3;                                         // Extract the next base-4 digit
            X = (X << 2) | MxV(matrices[id], digit);
            id = getChildId(id, digit);
        }
        return X ^ xorCode;
    }
};

// =============================================================================
// The original hashing of Ahmed and Wonka's ZSampler
// =============================================================================

#define P(a, b, c, d) ((a) + ((b) << 2) + ((c) << 4) + ((d) << 6))          /* Pack 4! permutations */
PBRT_CONST unsigned char PERMUTATIONS[24] = {
    P(0,1,2,3), P(0,1,3,2), P(0,2,1,3), P(0,2,3,1), P(0,3,1,2), P(0,3,2,1),
    P(1,0,2,3), P(1,0,3,2), P(1,2,0,3), P(1,2,3,0), P(1,3,0,2), P(1,3,2,0),
    P(2,0,1,3), P(2,0,3,1), P(2,1,0,3), P(2,1,3,0), P(2,3,0,1), P(2,3,1,0),
    P(3,0,1,2), P(3,0,2,1), P(3,1,0,2), P(3,1,2,0), P(3,2,0,1), P(3,2,1,0)
};

class ZHash {
public:
    PBRT_CPU_GPU
    inline uint32_t hash(uint32_t branch) {
        constexpr int BITS = 24;
        constexpr uint32_t MASK = (1 << BITS) - 1;
        constexpr uint32_t Z = 0x9E377A;                                        // Z / (1 << BITS) approximates 1 - golden ratio
        uint32_t x = (branch * Z) & MASK;                                       // Fractional part
        return (x * 24) >> BITS;                                                // Map to the desired range
    };
    PBRT_CPU_GPU
    inline uint32_t operator()(uint32_t x) {
        uint32_t xOut(0);
        uint32_t tileID = 0;
        for (int branch = 0, pos = 30; pos >= 0; pos -= 2) {
            int digit = (x >> pos) & 3;                                         // Extract the next base-4 digit
            int permutation = PERMUTATIONS[hash(branch)];
            int digitOut = (permutation >> (2 * digit)) & 3;                    // Apply it
            xOut = (xOut << 2) | digitOut;
            branch = (branch << 2) | digit;                                     // Seems like the branch is digit-reversed??
        }
        return xOut;

    }
};
#undef P

class Base4OwenIdentity {
public:
    PBRT_CPU_GPU
    inline uint32_t operator()(uint32_t x) {
        return x;
    }
};



// =============================================================================
// Functions for evaluating SZ samples
// =============================================================================

// -----------------------------------------------------------------------------
// 2D Sobol fast generation -- 64bit
// -----------------------------------------------------------------------------

PBRT_CPU_GPU
inline uint64_t J64(uint64_t x) {
    return ReverseBits64(x);
    /*
    x = ((x & 0x5555555555555555llu) << 1)|((x >> 1) & 0x5555555555555555llu);
    x = ((x & 0x3333333333333333llu) << 2)|((x >> 2) & 0x3333333333333333llu);
    x = ((x & 0x0f0f0f0f0f0f0f0fllu) << 4)|((x >> 4) & 0x0f0f0f0f0f0f0f0fllu);
    x = ((x & 0x00ff00ff00ff00ffllu) << 8)|((x >> 8) & 0x00ff00ff00ff00ffllu);
    x = ((x & 0x0000ffff0000ffffllu) <<16)|((x >>16) & 0x0000ffff0000ffffllu);
    x = (x << 32) | (x >> 32);                                                  // final 32-bit swap, no mask needed
    return x;
    */
}

PBRT_CPU_GPU
inline uint64_t PJ64(uint64_t i) {
    uint64_t j;
    j= i & 0xFFFFFFFF00000000; i= j^ (j>>32)^ ((i<<32)                     );
    j= i & 0xFFFF0000FFFF0000; i= j^ (j>>16)^ ((i<<16) & 0xFFFF0000FFFF0000);
    j= i & 0xFF00FF00FF00FF00; i= j^ (j>> 8)^ ((i<< 8) & 0xFF00FF00FF00FF00);
    j= i & 0xF0F0F0F0F0F0F0F0; i= j^ (j>> 4)^ ((i<< 4) & 0xF0F0F0F0F0F0F0F0);
    j= i & 0xCCCCCCCCCCCCCCCC; i= j^ (j>> 2)^ ((i<< 2) & 0xCCCCCCCCCCCCCCCC);
    j= i & 0xAAAAAAAAAAAAAAAA; i= j^ (j>> 1)^ ((i<< 1) & 0xAAAAAAAAAAAAAAAA);
    return i;
}

// -----------------------------------------------------------------------------
// 2D Sobol inversion -- 32bit
// -----------------------------------------------------------------------------

PBRT_CPU_GPU
inline uint32_t J32(uint32_t i) {
    return ReverseBits32(i);
    /*
    i = (i << 16) | (i >> 16);
    i = ((i & 0x00FF00FF) << 8) | ((i & 0xFF00FF00) >> 8);
    i = ((i & 0x0F0F0F0F) << 4) | ((i & 0xF0F0F0F0) >> 4);
    i = ((i & 0x33333333) << 2) | ((i & 0xCCCCCCCC) >> 2);
    i = ((i & 0x55555555) << 1) | ((i & 0xAAAAAAAA) >> 1);
    return i;*/
}

PBRT_CPU_GPU
inline uint32_t IP_inv (uint32_t v) {
    v ^= (v & 0x3333) <<  2;
    v ^= (v & 0x5555) <<  1;
    v ^= (v & 0x0F0F) <<  4;
    v ^= (v & 0x00FF) <<  8;
    v ^=  v           >> 16;
    return v;
}

PBRT_CPU_GPU
inline uint32_t xi_u_inv(uint32_t v) {
    v ^= ((v & 0x0000FFFF) << 8) ^ ((v & 0x000000FF) << 16);
    v ^= ((v & 0x00FF00FF) << 4) ^ ((v & 0x000F000F) <<  8);
    v ^= ((v & 0x0F0F0F0F) << 2) ^ ((v & 0x03030303) <<  4);
    v ^= ((v & 0x33333333) << 1) ^ ((v & 0x11111111) <<  2);
    return v;
}

PBRT_CPU_GPU
inline uint64_t Jxi_u_invJ(uint64_t v) {
    v ^= ((v & 0xFFFFFFFF00000000llu)>>16) ^ ((v & 0xFFFF000000000000llu)>>32);
    v ^= ((v & 0xFFFF0000FFFF0000llu)>> 8) ^ ((v & 0xFF000000FF000000llu)>>16);
    v ^= ((v & 0xFF00FF00FF00FF00llu)>> 4) ^ ((v & 0xF000F000F000F000llu)>> 8);
    v ^= ((v & 0xF0F0F0F0F0F0F0F0llu)>> 2) ^ ((v & 0xC0C0C0C0C0C0C0C0llu)>> 4);
    v ^= ((v & 0xCCCCCCCCCCCCCCCCllu)>> 1) ^ ((v & 0x8888888888888888llu)>> 2);
    return v;
}

PBRT_CPU_GPU
inline uint64_t Jxi_uJ (uint64_t v) {
    v ^= (v & 0xCCCCCCCCCCCCCCCCllu) >>  1;
    v ^= (v & 0xF0F0F0F0F0F0F0F0llu) >>  2;
    v ^= (v & 0xFF00FF00FF00FF00llu) >>  4;
    v ^= (v & 0xFFFF0000FFFF0000llu) >>  8;
    v ^= (v & 0xFFFFFFFF00000000llu) >> 16;
    return v;
}

PBRT_CPU_GPU
inline uint64_t invIP(int m, uint32_t X, uint32_t Y, uint64_t sampleNo = 0) {
    if (m == 0) return sampleNo;
    int m2 = m << 1;
    uint32_t v = ((X << 16) | Y) << (16 - m);                                   // concat x:y as 16-bit fixed-point fractions
    sampleNo <<= m2;                                                            // Position sampleNo above pixel-indexing range of bits
    // -------------------------------------------------------------------------
    // Invert using [i:p]-inverse matrix.
    // This crude inversion gives the sample index of the pixel Origin
    // at 16-bit resolution, and will be refined subsequently
    // -------------------------------------------------------------------------
    v = IP_inv(v);
    // -------------------------------------------------------------------------
    // Translate to alpha-ordering so that all pixel samples have the same
    // suffix/prefix in normal/reversed order of bits.
    // -------------------------------------------------------------------------
    v = xi_u_inv(v);
    // -------------------------------------------------------------------------
    // Isolate the suffix/prefix bits for the pixel alpha index
    // -------------------------------------------------------------------------
    v = J32(v);                                                                 // Restore normal ordering of bits
    v &= ((1 << m2) - 1);                                                       // Isolate the 2*m-bits suffix
    // -------------------------------------------------------------------------
    // Adjust sampleNo to (I,P) ordering.
    // This is optional, ignoring it would reorder the samples within each
    // power-of-two prefix of the pixel samples.
    // We noticed that it does not change the rate, possibly thanks to
    // concurrent processing of the CPU.
    // -------------------------------------------------------------------------
    sampleNo = Jxi_u_invJ(sampleNo);
    sampleNo &= 0xFFFFFFFFFFFFFFFFllu << m2;                                    // Mask trailing bits.
    // -------------------------------------------------------------------------
    // Concatenate, bit-reverse, restore (I,P) ordering, and bit-reverse again
    // -------------------------------------------------------------------------
    return Jxi_uJ(v | sampleNo);                                                // Restore (I, P) ordering and
}

// -----------------------------------------------------------------------------
// Multiplication by 4x4-block Pascal matrix with bit-reversal to give PJ
// -----------------------------------------------------------------------------

PBRT_CPU_GPU
inline uint64_t PJ4(uint64_t i) {
    uint64_t j;
    j= i & 0xFFFFFFFF00000000; i= j^ (j>>32)^ ((i<<32)                     );
    j= i & 0xFFFF0000FFFF0000; i= j^ (j>>16)^ ((i<<16) & 0xFFFF0000FFFF0000);
    j= i & 0xFF00FF00FF00FF00; i= j^ (j>> 8)^ ((i<< 8) & 0xFF00FF00FF00FF00);
    j= i & 0xF0F0F0F0F0F0F0F0; i= j^ (j>> 4)^ ((i<< 4) & 0xF0F0F0F0F0F0F0F0);
    // -------------------------------------------------------------------------
    // Last two stages are just bit reversal.
    // -------------------------------------------------------------------------
    i = ((i & 0x3333333333333333) << 2)|((i >> 2) & 0x3333333333333333);
    i = ((i & 0x5555555555555555) << 1)|((i >> 1) & 0x5555555555555555);
    return i;
}

// -----------------------------------------------------------------------------
// Multiplication by 8x8-block Pascal matrix with bit-reversal to give PJ
// -----------------------------------------------------------------------------

PBRT_CPU_GPU
inline uint64_t PJ8(uint64_t i) {                                               // 4x4 block Pascal with bit reversal
    uint64_t j;
    j= i & 0xFFFFFFFF00000000; i= j^ (j>>32)^ ((i<<32)                     );
    j= i & 0xFFFF0000FFFF0000; i= j^ (j>>16)^ ((i<<16) & 0xFFFF0000FFFF0000);
    j= i & 0xFF00FF00FF00FF00; i= j^ (j>> 8)^ ((i<< 8) & 0xFF00FF00FF00FF00);
    // -------------------------------------------------------------------------
    // Last three stages are just bit reversal.
    // -------------------------------------------------------------------------
    i = ((i & 0x0F0F0F0F0F0F0F0F) << 4)|((i >> 4) & 0x0F0F0F0F0F0F0F0F);
    i = ((i & 0x3333333333333333) << 2)|((i >> 2) & 0x3333333333333333);
    i = ((i & 0x5555555555555555) << 1)|((i >> 1) & 0x5555555555555555);
    return i;
}

// -----------------------------------------------------------------------------
// Multiply by diagonals of a 4x4 diagonal blocks
// -----------------------------------------------------------------------------

PBRT_CPU_GPU
inline uint64_t MxV4(const uint64_t *d, uint64_t x) {
    return (
        ((x >> 3) & d[0]) ^
        ((x >> 2) & d[1]) ^
        ((x >> 1) & d[2]) ^
        ( x       & d[3]) ^
        ((x << 1) & d[4]) ^
        ((x << 2) & d[5]) ^
        ((x << 3) & d[6])
    );
}

// -----------------------------------------------------------------------------
// Multiply by diagonals of a 8x8 diagonal blocks
// -----------------------------------------------------------------------------

PBRT_CPU_GPU
inline uint64_t MxV8(const uint64_t *d, uint64_t x) {
    return (
        ((x >> 7) & d[ 0]) ^
        ((x >> 6) & d[ 1]) ^
        ((x >> 5) & d[ 2]) ^
        ((x >> 4) & d[ 3]) ^
        ((x >> 3) & d[ 4]) ^
        ((x >> 2) & d[ 5]) ^
        ((x >> 1) & d[ 6]) ^
        ( x       & d[ 7]) ^
        ((x << 1) & d[ 8]) ^
        ((x << 2) & d[ 9]) ^
        ((x << 3) & d[10]) ^
        ((x << 4) & d[11]) ^
        ((x << 5) & d[12]) ^
        ((x << 6) & d[13]) ^
        ((x << 7) & d[14])
    );
}

// -----------------------------------------------------------------------------
// Multiply by an SZ matrix using S-P-Z decomposition, 0 < dim < 16
// -----------------------------------------------------------------------------

PBRT_CPU_GPU
inline uint64_t mulFast4(const uint64_t *d, uint64_t seqNo) {
    return MxV4(&d[7], PJ4(MxV4(&d[0], seqNo)));
}

PBRT_CPU_GPU
inline uint64_t mulFast8(const uint64_t *d, uint64_t seqNo) {
    return MxV8(&d[15], PJ8(MxV8(&d[0], seqNo)));
}

// -----------------------------------------------------------------------------
// Compute a Float sample
// -----------------------------------------------------------------------------
PBRT_CPU_GPU
inline Float szSampleFloatFast(uint64_t seqNo, int dimension) {
    //const uint64_t *diagonals = SZMatrices_O2_Ensembled_Upper_Diagonals_64bit;
    #define diagonals SZMatrices_O2_Ensembled_Upper_Diagonals_64bit
    dimension &= 255;
    uint64_t v;
    if (dimension >= 16) {
        v = mulFast8(&diagonals[(dimension - 16) * 30 + 15*14], seqNo);
    }
    else if (dimension) {
        v = mulFast4(&diagonals[(dimension - 1) * 14], seqNo);
    }
    else {
        v = J64(seqNo);
    }
    #ifdef PBRT_FLOAT_AS_DOUBLE
        v >>= 12;
        return std::min(
            v * (1.0 / (1ULL << SobolMatrixSize)), DoubleOneMinusEpsilon
        );
    #else
        v >>= 32;
        #ifndef PBRT_HAVE_HEX_FP_CONSTANTS
            return std::min(
                v * 2.3283064365386963e-10f /* 1/2^32 */, FloatOneMinusEpsilon
            );
        #else
            return std::min(v * 0x1p-32f /* 1/2^32 */, FloatOneMinusEpsilon);
        #endif
    #endif
    #undef diagonals
}

// =============================================================================
// SZZSampler Definition
// =============================================================================

template <typename Base4Owen>
class SZZSampler {
public:
    // SZZSampler Public Methods
    PBRT_CPU_GPU
    static constexpr const char *Name() { return "SZZSampler"; }
    static inline SZZSampler *Create(
        const ParameterDictionary &parameters,
        Point2i fullResolution,
        const FileLoc *loc,
        Allocator alloc
    ) {
        int nsamp = parameters.GetOneInt("pixelsamples", 16);
        if (Options->pixelSamples)
            nsamp = *Options->pixelSamples;
        if (Options->quickRender)
            nsamp = 1;
        int seed = parameters.GetOneInt("seed", Options->seed);

        RandomizeStrategy randomizer;
        //std::string s = parameters.GetOneString("randomization", "owen");
        std::string s = parameters.GetOneString("randomization", "none");
        if (s == "none")
            randomizer = RandomizeStrategy::None;
        else if (s == "permutedigits")
            randomizer = RandomizeStrategy::PermuteDigits;
        else if (s == "owen")
            randomizer = RandomizeStrategy::Owen;
        else
            ErrorExit(
                loc, "%s: unknown randomization strategy given to SZZSampler",
                s
            );
        return alloc.new_object<SZZSampler>(
            nsamp, fullResolution, randomizer, seed
        );
    }

    SZZSampler(
        int samplesPerPixel,
        Point2i fullResolution,
        RandomizeStrategy randomizer,
        int seed = 0
    ) : samplesPerPixel(samplesPerPixel), randomize(randomizer), seed(seed) {
        if (!IsPowerOf2(samplesPerPixel))
            ErrorExit("Must use power of 2 spp with SZZSampler: given %d",
                      samplesPerPixel);
            log2spp = Log2Int(samplesPerPixel);
        scale = RoundUpPow2(std::max(fullResolution.x, fullResolution.y));
    }

    PBRT_CPU_GPU
    int SamplesPerPixel() const { return samplesPerPixel; }

    // -------------------------------------------------------------------------
    // Setting up for a new sample
    // -------------------------------------------------------------------------
    //PBRT_CPU_GPU
    void StartPixelSample(Point2i p, int sampleIndex, int dim) {
        CHECK_EQ(dim, 0);
        pixel = p;
        // ---------------------------------------------------------------------
        // Compute global sample index
        // ---------------------------------------------------------------------
        //uint32_t pixelMortonIndex = (
        //    _pdep_u32(p.x, 0x55555555) | _pdep_u32(p.y, 0xAAAAAAAA)
        //);
        uint32_t pixelMortonIndex = EncodeMorton2(p.x, p.y);
        uint64_t pixelSeqNo = base4Owen(pixelMortonIndex);                       // This has to be uint64_t because sample indices are appended
        sampleSeqNo = (pixelSeqNo * samplesPerPixel) ^ sampleIndex;
        // ---------------------------------------------------------------------
        // Reset dimension pointers
        // ---------------------------------------------------------------------
        next1D = 0;
        next2D = 0;
    }

    // -------------------------------------------------------------------------
    // Retrieve 1D component of current sample
    // -------------------------------------------------------------------------
    PBRT_CPU_GPU
    Float Get1D() {
        Float x = SampleDimension(next1D);                                      // Retrieve the sample
        next1D++;                                                               // Advance pointer
        if (next1D & 1) { next2D = next1D + 1; }                                // When next1D becomes odd, it means we half-filled the front pair, hence advance 2D pointer to next pair
        else { next1D = next2D; }                                               // Pair fully shared, advance 1D pointer to front
        return x;
    }

    // -------------------------------------------------------------------------
    // Retrieve 1D component of current sample
    // -------------------------------------------------------------------------
    PBRT_CPU_GPU
    Point2f Get2D() {
        Point2f p(SampleDimension(next2D), SampleDimension(next2D + 1));
        if (next1D == next2D) { next1D += 2; }                                      // Advance 1D pointer as well if it is on the front.
        next2D += 2;
        return p;
    }

    // -------------------------------------------------------------------------
    // Retrieve on-pixel coordinates of current sample
    // -------------------------------------------------------------------------
    PBRT_CPU_GPU
    Point2f GetPixel2D() {
        int log2n = log2spp / 2;                                                // n is strata per row
        int log2nxn = 2 * log2n;                                                // nxn is number of strata
        int n = 1 << log2n;
        int nxn = 1 << log2nxn;
        int sampleNo = sampleSeqNo & (samplesPerPixel - 1);                     // Retrieve original sample number
        int Z = sampleNo & (nxn - 1);                                           // Stratum index
        //uint32_t X = _pext_u32(Z, 0x55555555);                                  // Stratum X
        //uint32_t Y = _pext_u32(Z, 0xAAAAAAAA);                                  // Stratum Y
        uint32_t X, Y;                                                          // Stratum X, Y
        DecodeMorton2(Z, &X, &Y);
        uint64_t netIndex = (sampleSeqNo >> log2nxn) << log2nxn;                // Separate shared net index of the log2nxn-points net
        uint32_t Y0 = Y ^  (PJ64(netIndex) >> (64-log2n));                      // Undo the scrambling of the net, restoring Y of net0
        uint64_t sampleIndex = netIndex | invIP(log2n, X, Y0);                  // Retrieve index of sample falling in this stratum
        return {fxd2flt(J64(sampleIndex)), fxd2flt(PJ64(sampleIndex))};         // Retrieve coordinates
    }

    PBRT_CPU_GPU
    RandomizeStrategy GetRandomizeStrategy() const { return randomize; }

    Sampler Clone(Allocator alloc) {
        return alloc.new_object<SZZSampler>(*this);
    }
    std::string ToString() const { return "szzSampler"; };

private:
    mutable RNG rng;
    int samplesPerPixel, scale, seed;
    int log2Resolution, log2spp;
    RandomizeStrategy randomize;
    Base4Owen base4Owen;
    Point2i pixel;
    int next2D;                                                                 // Pointer to next pair of dimension of the sequence
    int next1D;                                                                 // Next 1D dimension, used for pairing 1D components in 2D
    uint64_t sampleSeqNo;                                                       // Global sequence number of current sample
    // -------------------------------------------------------------------------
    // Retrieve a specified dimension of a sample
    // -------------------------------------------------------------------------
    PBRT_CPU_GPU
    inline Float SampleDimension(int dim) const {
        return szSampleFloatFast(sampleSeqNo, dim);
    }
    // -------------------------------------------------------------------------
    // Fixed to Float conversion
    // -------------------------------------------------------------------------
    PBRT_CPU_GPU
    inline Float fxd2flt(uint64_t v) {
        #ifdef PBRT_FLOAT_AS_DOUBLE
        v >>= 12;
        return std::min(
            v * (1.0 / (1ULL << SobolMatrixSize)), DoubleOneMinusEpsilon
        );
        #else
        v >>= 32;
        #ifndef PBRT_HAVE_HEX_FP_CONSTANTS
        return std::min(
            v * 2.3283064365386963e-10f /* 1/2^32 */,
            FloatOneMinusEpsilon
        );
        #else
        return std::min(
            v * 0x1p-32f /* 1/2^32 */,
            FloatOneMinusEpsilon
        );
        #endif
        #endif
    }
};



}                                                                               // namespace pbrt

#endif                                                                          // #ifndef PBRT_SZ_SAMPLERS_H
