proconlib

This documentation is automatically generated by online-judge-tools/verification-helper

View the Project on GitHub KodamaD/proconlib

:heavy_check_mark: internal/modulo_transform.cpp

Depends on

Required by

Verified with

Code

#pragma once
#include <array>
#include <vector>
#include "../math/primitive_root.cpp"
#include "../utility/countr_zero.cpp"
#include "../utility/int_alias.cpp"
#include "../utility/rep.cpp"
#include "../utility/revrep.cpp"

namespace proconlib {

template <class M> struct ModuloTransform {
    static constexpr u32 MOD = M::mod();
    static constexpr u32 ROOT = primitive_root(MOD);
    static constexpr int RANK = countr_zero(MOD - 1);

    std::array<M, RANK + 1> root, iroot;
    std::array<M, (RANK >= 2 ? RANK - 2 + 1 : 0)> rate2, irate2;
    std::array<M, (RANK >= 3 ? RANK - 3 + 1 : 0)> rate3, irate3;

    constexpr ModuloTransform() : root(), iroot(), rate2(), irate2(), rate3(), irate3() {
        root[RANK] = M(ROOT).pow((MOD - 1) >> RANK);
        iroot[RANK] = root[RANK].inv();
        for (const int i : revrep(0, RANK)) {
            root[i] = root[i + 1] * root[i + 1];
            iroot[i] = iroot[i + 1] * iroot[i + 1];
        }
        M prod = 1, iprod = 1;
        for (const int i : rep(2, RANK + 1)) {
            rate2[i - 2] = root[i] * prod;
            irate2[i - 2] = iroot[i] * iprod;
            prod *= iroot[i];
            iprod *= root[i];
        }
        prod = 1, iprod = 1;
        for (const int i : rep(3, RANK + 1)) {
            rate3[i - 3] = root[i] * prod;
            irate3[i - 3] = iroot[i] * iprod;
            prod *= iroot[i];
            iprod *= root[i];
        }
    }

    void butterfly(std::vector<M>& a) const {
        const int n = a.size();
        const int h = countr_zero(n);
        for (int len = 0; len < h;) {
            if (len + 1 == h) {
                M rot = 1;
                for (const int s : rep(0, 1 << len)) {
                    const int t = s << 1;
                    const M l = a[t], r = a[t + 1] * rot;
                    a[t] = l + r;
                    a[t + 1] = l - r;
                    if (((s + 1) >> len) == 0) rot *= rate2[countr_zero(~s)];
                }
                len += 1;
            } else {
                const int p = 1 << (h - len - 2);
                M rot = 1;
                for (const int s : rep(0, 1 << len)) {
                    const int t = s << (h - len);
                    const M rot2 = rot * rot;
                    const M rot3 = rot2 * rot;
                    for (const int i : rep(0, p)) {
                        const M a0 = a[i + t];
                        const M a1 = a[i + t + p] * rot;
                        const M a2 = a[i + t + p * 2] * rot2;
                        const M a3 = a[i + t + p * 3] * rot3;
                        const M ax = (a1 - a3) * root[2];
                        a[i + t] = a0 + a1 + a2 + a3;
                        a[i + t + p] = a0 - a1 + a2 - a3;
                        a[i + t + p * 2] = a0 - a2 + ax;
                        a[i + t + p * 3] = a0 - a2 - ax;
                    }
                    if (((s + 1) >> len) == 0) rot *= rate3[countr_zero(~s)];
                }
                len += 2;
            }
        }
    }

    void butterfly_inv(std::vector<M>& a) const {
        const int n = a.size();
        const int h = countr_zero(n);
        for (int len = h; len > 0;) {
            if (len == 1) {
                const int p = n >> 1;
                for (const int i : rep(0, p)) {
                    const M l = a[i], r = a[i + p];
                    a[i] = l + r;
                    a[i + p] = l - r;
                }
                len -= 1;
            } else {
                const int p = 1 << (h - len);
                M rot = 1;
                for (const int s : rep(0, 1 << (len - 2))) {
                    const int t = s << (h - len + 2);
                    const M rot2 = rot * rot;
                    const M rot3 = rot2 * rot;
                    for (const int i : rep(0, p)) {
                        const M a0 = a[i + t];
                        const M a1 = a[i + t + p];
                        const M a2 = a[i + t + p * 2];
                        const M a3 = a[i + t + p * 3];
                        const M ax = (a2 - a3) * iroot[2];
                        a[i + t] = a0 + a1 + a2 + a3;
                        a[i + t + p] = (a0 - a1 + ax) * rot;
                        a[i + t + p * 2] = (a0 + a1 - a2 - a3) * rot2;
                        a[i + t + p * 3] = (a0 - a1 - ax) * rot3;
                    }
                    if (((s + 1) >> (len - 2)) == 0) rot *= irate3[countr_zero(~s)];
                }
                len -= 2;
            }
        }
    }
};

}  // namespace proconlib
#line 2 "internal/modulo_transform.cpp"
#include <array>
#include <vector>
#line 2 "utility/int_alias.cpp"
#include <cstdint>

using i32 = std::int32_t;
using u32 = std::uint32_t;
using i64 = std::int64_t;
using u64 = std::uint64_t;
using i128 = __int128_t;
using u128 = __uint128_t;
#line 2 "math/mod_pow.cpp"
#include <cassert>
#include <type_traits>
#line 3 "internal/barret_reduction.cpp"

namespace proconlib {

class BarretReduction {
    u32 mod;
    u64 near_inv;

  public:
    explicit constexpr BarretReduction(const u32 mod) noexcept : mod(mod), near_inv((u64)(-1) / mod + 1) {}
    constexpr u32 product(const u32 a, const u32 b) const noexcept {
        const u64 z = (u64)a * b;
        const u64 x = ((u128)z * near_inv) >> 64;
        const u32 v = z - x * mod;
        return v < mod ? v : v + mod;
    }
    constexpr u32 get_mod() const noexcept { return mod; }
};

}  // namespace proconlib
#line 3 "math/rem_euclid.cpp"

template <class T> constexpr T rem_euclid(T value, const T& mod) {
    assert(mod > 0);
    return (value %= mod) >= 0 ? value : value + mod;
}
#line 7 "math/mod_pow.cpp"

template <class T> constexpr u32 mod_pow(T x, u64 exp, const u32 mod) {
    assert(mod > 0);
    if (mod == 1) return 0;
    const proconlib::BarretReduction bt(mod);
    u32 ret = 1, mul = rem_euclid<std::common_type_t<T, i64>>(x, mod);
    for (; exp > 0; exp >>= 1) {
        if (exp & 1) ret = bt.product(ret, mul);
        mul = bt.product(mul, mul);
    }
    return ret;
}
#line 5 "math/primitive_root.cpp"

constexpr u32 primitive_root(const u32 mod) {
    std::array<u32, 32> exp{};
    u32 cur = mod - 1;
    int size = 0;
    for (u32 i = 2; i * i <= cur; ++i) {
        if (cur % i == 0) {
            exp[size++] = (mod - 1) / i;
            while (cur % i == 0) cur /= i;
        }
    }
    if (cur != 1) exp[size++] = (mod - 1) / cur;
    for (u32 check = 1; check < mod; ++check) {
        for (const u32 e : exp) {
            if (e == 0) return check;
            if (mod_pow(check, e, mod) == 1) break;
        }
    }
    return mod;
}
#line 2 "internal/enable_avx2.cpp"

#ifdef ENABLE_AVX2
#define TARGET_AVX2 __attribute__((target("avx2")))
#else
#define TARGET_AVX2
#endif
#line 5 "utility/countr_zero.cpp"

constexpr int countr_zero(u64 x) {
    if (x == 0) return 64;
#ifdef __GNUC__
    return __builtin_ctzll(x);
#else
    constexpr std::array<int, 64> table = {0,  1,  2,  7,  3,  13, 8,  27, 4,  33, 14, 36, 9,  49, 28, 19,
                                             5,  25, 34, 17, 15, 53, 37, 55, 10, 46, 50, 39, 29, 42, 20, 57,
                                             63, 6,  12, 26, 32, 35, 48, 18, 24, 16, 52, 54, 45, 38, 41, 56,
                                             62, 11, 31, 47, 23, 51, 44, 40, 61, 30, 22, 43, 60, 21, 59, 58};
    return table[(x & (~x + 1)) * 0x218A7A392DD9ABF >> 58 & 0x3F];
#endif
}
#line 2 "utility/rep.cpp"
#include <algorithm>

class Range {
    struct Iter {
        int itr;
        constexpr Iter(const int pos) noexcept : itr(pos) {}
        constexpr void operator++() noexcept { ++itr; }
        constexpr bool operator!=(const Iter& other) const noexcept { return itr != other.itr; }
        constexpr int operator*() const noexcept { return itr; }
    };
    const Iter first, last;

  public:
    explicit constexpr Range(const int first, const int last) noexcept : first(first), last(std::max(first, last)) {}
    constexpr Iter begin() const noexcept { return first; }
    constexpr Iter end() const noexcept { return last; }
};

constexpr Range rep(const int l, const int r) noexcept { return Range(l, r); }
constexpr Range rep(const int n) noexcept { return Range(0, n); }
#line 3 "utility/revrep.cpp"

class ReversedRange {
    struct Iter {
        int itr;
        constexpr Iter(const int pos) noexcept : itr(pos) {}
        constexpr void operator++() noexcept { --itr; }
        constexpr bool operator!=(const Iter& other) const noexcept { return itr != other.itr; }
        constexpr int operator*() const noexcept { return itr; }
    };
    const Iter first, last;

  public:
    explicit constexpr ReversedRange(const int first, const int last) noexcept
        : first(last - 1), last(std::min(first, last) - 1) {}
    constexpr Iter begin() const noexcept { return first; }
    constexpr Iter end() const noexcept { return last; }
};

constexpr ReversedRange revrep(const int l, const int r) noexcept { return ReversedRange(l, r); }
constexpr ReversedRange revrep(const int n) noexcept { return ReversedRange(0, n); }
#line 9 "internal/modulo_transform.cpp"

namespace proconlib {

template <class M> struct ModuloTransform {
    static constexpr u32 MOD = M::mod();
    static constexpr u32 ROOT = primitive_root(MOD);
    static constexpr int RANK = countr_zero(MOD - 1);

    std::array<M, RANK + 1> root, iroot;
    std::array<M, (RANK >= 2 ? RANK - 2 + 1 : 0)> rate2, irate2;
    std::array<M, (RANK >= 3 ? RANK - 3 + 1 : 0)> rate3, irate3;

    constexpr ModuloTransform() : root(), iroot(), rate2(), irate2(), rate3(), irate3() {
        root[RANK] = M(ROOT).pow((MOD - 1) >> RANK);
        iroot[RANK] = root[RANK].inv();
        for (const int i : revrep(0, RANK)) {
            root[i] = root[i + 1] * root[i + 1];
            iroot[i] = iroot[i + 1] * iroot[i + 1];
        }
        M prod = 1, iprod = 1;
        for (const int i : rep(2, RANK + 1)) {
            rate2[i - 2] = root[i] * prod;
            irate2[i - 2] = iroot[i] * iprod;
            prod *= iroot[i];
            iprod *= root[i];
        }
        prod = 1, iprod = 1;
        for (const int i : rep(3, RANK + 1)) {
            rate3[i - 3] = root[i] * prod;
            irate3[i - 3] = iroot[i] * iprod;
            prod *= iroot[i];
            iprod *= root[i];
        }
    }

    void butterfly(std::vector<M>& a) const {
        const int n = a.size();
        const int h = countr_zero(n);
        for (int len = 0; len < h;) {
            if (len + 1 == h) {
                M rot = 1;
                for (const int s : rep(0, 1 << len)) {
                    const int t = s << 1;
                    const M l = a[t], r = a[t + 1] * rot;
                    a[t] = l + r;
                    a[t + 1] = l - r;
                    if (((s + 1) >> len) == 0) rot *= rate2[countr_zero(~s)];
                }
                len += 1;
            } else {
                const int p = 1 << (h - len - 2);
                M rot = 1;
                for (const int s : rep(0, 1 << len)) {
                    const int t = s << (h - len);
                    const M rot2 = rot * rot;
                    const M rot3 = rot2 * rot;
                    for (const int i : rep(0, p)) {
                        const M a0 = a[i + t];
                        const M a1 = a[i + t + p] * rot;
                        const M a2 = a[i + t + p * 2] * rot2;
                        const M a3 = a[i + t + p * 3] * rot3;
                        const M ax = (a1 - a3) * root[2];
                        a[i + t] = a0 + a1 + a2 + a3;
                        a[i + t + p] = a0 - a1 + a2 - a3;
                        a[i + t + p * 2] = a0 - a2 + ax;
                        a[i + t + p * 3] = a0 - a2 - ax;
                    }
                    if (((s + 1) >> len) == 0) rot *= rate3[countr_zero(~s)];
                }
                len += 2;
            }
        }
    }

    void butterfly_inv(std::vector<M>& a) const {
        const int n = a.size();
        const int h = countr_zero(n);
        for (int len = h; len > 0;) {
            if (len == 1) {
                const int p = n >> 1;
                for (const int i : rep(0, p)) {
                    const M l = a[i], r = a[i + p];
                    a[i] = l + r;
                    a[i + p] = l - r;
                }
                len -= 1;
            } else {
                const int p = 1 << (h - len);
                M rot = 1;
                for (const int s : rep(0, 1 << (len - 2))) {
                    const int t = s << (h - len + 2);
                    const M rot2 = rot * rot;
                    const M rot3 = rot2 * rot;
                    for (const int i : rep(0, p)) {
                        const M a0 = a[i + t];
                        const M a1 = a[i + t + p];
                        const M a2 = a[i + t + p * 2];
                        const M a3 = a[i + t + p * 3];
                        const M ax = (a2 - a3) * iroot[2];
                        a[i + t] = a0 + a1 + a2 + a3;
                        a[i + t + p] = (a0 - a1 + ax) * rot;
                        a[i + t + p * 2] = (a0 + a1 - a2 - a3) * rot2;
                        a[i + t + p * 3] = (a0 - a1 - ax) * rot3;
                    }
                    if (((s + 1) >> (len - 2)) == 0) rot *= irate3[countr_zero(~s)];
                }
                len -= 2;
            }
        }
    }
};

}  // namespace proconlib
Back to top page