This documentation is automatically generated by online-judge-tools/verification-helper
#pragma once
#include <array>
#include <vector>
#include "../math/primitive_root.cpp"
#include "../utility/countr_zero.cpp"
#include "../utility/int_alias.cpp"
#include "../utility/rep.cpp"
#include "../utility/revrep.cpp"
namespace proconlib {
template <class M> struct ModuloTransform {
static constexpr u32 MOD = M::mod();
static constexpr u32 ROOT = primitive_root(MOD);
static constexpr int RANK = countr_zero(MOD - 1);
std::array<M, RANK + 1> root, iroot;
std::array<M, (RANK >= 2 ? RANK - 2 + 1 : 0)> rate2, irate2;
std::array<M, (RANK >= 3 ? RANK - 3 + 1 : 0)> rate3, irate3;
constexpr ModuloTransform() : root(), iroot(), rate2(), irate2(), rate3(), irate3() {
root[RANK] = M(ROOT).pow((MOD - 1) >> RANK);
iroot[RANK] = root[RANK].inv();
for (const int i : revrep(0, RANK)) {
root[i] = root[i + 1] * root[i + 1];
iroot[i] = iroot[i + 1] * iroot[i + 1];
}
M prod = 1, iprod = 1;
for (const int i : rep(2, RANK + 1)) {
rate2[i - 2] = root[i] * prod;
irate2[i - 2] = iroot[i] * iprod;
prod *= iroot[i];
iprod *= root[i];
}
prod = 1, iprod = 1;
for (const int i : rep(3, RANK + 1)) {
rate3[i - 3] = root[i] * prod;
irate3[i - 3] = iroot[i] * iprod;
prod *= iroot[i];
iprod *= root[i];
}
}
void butterfly(std::vector<M>& a) const {
const int n = a.size();
const int h = countr_zero(n);
for (int len = 0; len < h;) {
if (len + 1 == h) {
M rot = 1;
for (const int s : rep(0, 1 << len)) {
const int t = s << 1;
const M l = a[t], r = a[t + 1] * rot;
a[t] = l + r;
a[t + 1] = l - r;
if (((s + 1) >> len) == 0) rot *= rate2[countr_zero(~s)];
}
len += 1;
} else {
const int p = 1 << (h - len - 2);
M rot = 1;
for (const int s : rep(0, 1 << len)) {
const int t = s << (h - len);
const M rot2 = rot * rot;
const M rot3 = rot2 * rot;
for (const int i : rep(0, p)) {
const M a0 = a[i + t];
const M a1 = a[i + t + p] * rot;
const M a2 = a[i + t + p * 2] * rot2;
const M a3 = a[i + t + p * 3] * rot3;
const M ax = (a1 - a3) * root[2];
a[i + t] = a0 + a1 + a2 + a3;
a[i + t + p] = a0 - a1 + a2 - a3;
a[i + t + p * 2] = a0 - a2 + ax;
a[i + t + p * 3] = a0 - a2 - ax;
}
if (((s + 1) >> len) == 0) rot *= rate3[countr_zero(~s)];
}
len += 2;
}
}
}
void butterfly_inv(std::vector<M>& a) const {
const int n = a.size();
const int h = countr_zero(n);
for (int len = h; len > 0;) {
if (len == 1) {
const int p = n >> 1;
for (const int i : rep(0, p)) {
const M l = a[i], r = a[i + p];
a[i] = l + r;
a[i + p] = l - r;
}
len -= 1;
} else {
const int p = 1 << (h - len);
M rot = 1;
for (const int s : rep(0, 1 << (len - 2))) {
const int t = s << (h - len + 2);
const M rot2 = rot * rot;
const M rot3 = rot2 * rot;
for (const int i : rep(0, p)) {
const M a0 = a[i + t];
const M a1 = a[i + t + p];
const M a2 = a[i + t + p * 2];
const M a3 = a[i + t + p * 3];
const M ax = (a2 - a3) * iroot[2];
a[i + t] = a0 + a1 + a2 + a3;
a[i + t + p] = (a0 - a1 + ax) * rot;
a[i + t + p * 2] = (a0 + a1 - a2 - a3) * rot2;
a[i + t + p * 3] = (a0 - a1 - ax) * rot3;
}
if (((s + 1) >> (len - 2)) == 0) rot *= irate3[countr_zero(~s)];
}
len -= 2;
}
}
}
};
} // namespace proconlib
#line 2 "internal/modulo_transform.cpp"
#include <array>
#include <vector>
#line 2 "utility/int_alias.cpp"
#include <cstdint>
using i32 = std::int32_t;
using u32 = std::uint32_t;
using i64 = std::int64_t;
using u64 = std::uint64_t;
using i128 = __int128_t;
using u128 = __uint128_t;
#line 2 "math/mod_pow.cpp"
#include <cassert>
#include <type_traits>
#line 3 "internal/barret_reduction.cpp"
namespace proconlib {
class BarretReduction {
u32 mod;
u64 near_inv;
public:
explicit constexpr BarretReduction(const u32 mod) noexcept : mod(mod), near_inv((u64)(-1) / mod + 1) {}
constexpr u32 product(const u32 a, const u32 b) const noexcept {
const u64 z = (u64)a * b;
const u64 x = ((u128)z * near_inv) >> 64;
const u32 v = z - x * mod;
return v < mod ? v : v + mod;
}
constexpr u32 get_mod() const noexcept { return mod; }
};
} // namespace proconlib
#line 3 "math/rem_euclid.cpp"
template <class T> constexpr T rem_euclid(T value, const T& mod) {
assert(mod > 0);
return (value %= mod) >= 0 ? value : value + mod;
}
#line 7 "math/mod_pow.cpp"
template <class T> constexpr u32 mod_pow(T x, u64 exp, const u32 mod) {
assert(mod > 0);
if (mod == 1) return 0;
const proconlib::BarretReduction bt(mod);
u32 ret = 1, mul = rem_euclid<std::common_type_t<T, i64>>(x, mod);
for (; exp > 0; exp >>= 1) {
if (exp & 1) ret = bt.product(ret, mul);
mul = bt.product(mul, mul);
}
return ret;
}
#line 5 "math/primitive_root.cpp"
constexpr u32 primitive_root(const u32 mod) {
std::array<u32, 32> exp{};
u32 cur = mod - 1;
int size = 0;
for (u32 i = 2; i * i <= cur; ++i) {
if (cur % i == 0) {
exp[size++] = (mod - 1) / i;
while (cur % i == 0) cur /= i;
}
}
if (cur != 1) exp[size++] = (mod - 1) / cur;
for (u32 check = 1; check < mod; ++check) {
for (const u32 e : exp) {
if (e == 0) return check;
if (mod_pow(check, e, mod) == 1) break;
}
}
return mod;
}
#line 2 "internal/enable_avx2.cpp"
#ifdef ENABLE_AVX2
#define TARGET_AVX2 __attribute__((target("avx2")))
#else
#define TARGET_AVX2
#endif
#line 5 "utility/countr_zero.cpp"
constexpr int countr_zero(u64 x) {
if (x == 0) return 64;
#ifdef __GNUC__
return __builtin_ctzll(x);
#else
constexpr std::array<int, 64> table = {0, 1, 2, 7, 3, 13, 8, 27, 4, 33, 14, 36, 9, 49, 28, 19,
5, 25, 34, 17, 15, 53, 37, 55, 10, 46, 50, 39, 29, 42, 20, 57,
63, 6, 12, 26, 32, 35, 48, 18, 24, 16, 52, 54, 45, 38, 41, 56,
62, 11, 31, 47, 23, 51, 44, 40, 61, 30, 22, 43, 60, 21, 59, 58};
return table[(x & (~x + 1)) * 0x218A7A392DD9ABF >> 58 & 0x3F];
#endif
}
#line 2 "utility/rep.cpp"
#include <algorithm>
class Range {
struct Iter {
int itr;
constexpr Iter(const int pos) noexcept : itr(pos) {}
constexpr void operator++() noexcept { ++itr; }
constexpr bool operator!=(const Iter& other) const noexcept { return itr != other.itr; }
constexpr int operator*() const noexcept { return itr; }
};
const Iter first, last;
public:
explicit constexpr Range(const int first, const int last) noexcept : first(first), last(std::max(first, last)) {}
constexpr Iter begin() const noexcept { return first; }
constexpr Iter end() const noexcept { return last; }
};
constexpr Range rep(const int l, const int r) noexcept { return Range(l, r); }
constexpr Range rep(const int n) noexcept { return Range(0, n); }
#line 3 "utility/revrep.cpp"
class ReversedRange {
struct Iter {
int itr;
constexpr Iter(const int pos) noexcept : itr(pos) {}
constexpr void operator++() noexcept { --itr; }
constexpr bool operator!=(const Iter& other) const noexcept { return itr != other.itr; }
constexpr int operator*() const noexcept { return itr; }
};
const Iter first, last;
public:
explicit constexpr ReversedRange(const int first, const int last) noexcept
: first(last - 1), last(std::min(first, last) - 1) {}
constexpr Iter begin() const noexcept { return first; }
constexpr Iter end() const noexcept { return last; }
};
constexpr ReversedRange revrep(const int l, const int r) noexcept { return ReversedRange(l, r); }
constexpr ReversedRange revrep(const int n) noexcept { return ReversedRange(0, n); }
#line 9 "internal/modulo_transform.cpp"
namespace proconlib {
template <class M> struct ModuloTransform {
static constexpr u32 MOD = M::mod();
static constexpr u32 ROOT = primitive_root(MOD);
static constexpr int RANK = countr_zero(MOD - 1);
std::array<M, RANK + 1> root, iroot;
std::array<M, (RANK >= 2 ? RANK - 2 + 1 : 0)> rate2, irate2;
std::array<M, (RANK >= 3 ? RANK - 3 + 1 : 0)> rate3, irate3;
constexpr ModuloTransform() : root(), iroot(), rate2(), irate2(), rate3(), irate3() {
root[RANK] = M(ROOT).pow((MOD - 1) >> RANK);
iroot[RANK] = root[RANK].inv();
for (const int i : revrep(0, RANK)) {
root[i] = root[i + 1] * root[i + 1];
iroot[i] = iroot[i + 1] * iroot[i + 1];
}
M prod = 1, iprod = 1;
for (const int i : rep(2, RANK + 1)) {
rate2[i - 2] = root[i] * prod;
irate2[i - 2] = iroot[i] * iprod;
prod *= iroot[i];
iprod *= root[i];
}
prod = 1, iprod = 1;
for (const int i : rep(3, RANK + 1)) {
rate3[i - 3] = root[i] * prod;
irate3[i - 3] = iroot[i] * iprod;
prod *= iroot[i];
iprod *= root[i];
}
}
void butterfly(std::vector<M>& a) const {
const int n = a.size();
const int h = countr_zero(n);
for (int len = 0; len < h;) {
if (len + 1 == h) {
M rot = 1;
for (const int s : rep(0, 1 << len)) {
const int t = s << 1;
const M l = a[t], r = a[t + 1] * rot;
a[t] = l + r;
a[t + 1] = l - r;
if (((s + 1) >> len) == 0) rot *= rate2[countr_zero(~s)];
}
len += 1;
} else {
const int p = 1 << (h - len - 2);
M rot = 1;
for (const int s : rep(0, 1 << len)) {
const int t = s << (h - len);
const M rot2 = rot * rot;
const M rot3 = rot2 * rot;
for (const int i : rep(0, p)) {
const M a0 = a[i + t];
const M a1 = a[i + t + p] * rot;
const M a2 = a[i + t + p * 2] * rot2;
const M a3 = a[i + t + p * 3] * rot3;
const M ax = (a1 - a3) * root[2];
a[i + t] = a0 + a1 + a2 + a3;
a[i + t + p] = a0 - a1 + a2 - a3;
a[i + t + p * 2] = a0 - a2 + ax;
a[i + t + p * 3] = a0 - a2 - ax;
}
if (((s + 1) >> len) == 0) rot *= rate3[countr_zero(~s)];
}
len += 2;
}
}
}
void butterfly_inv(std::vector<M>& a) const {
const int n = a.size();
const int h = countr_zero(n);
for (int len = h; len > 0;) {
if (len == 1) {
const int p = n >> 1;
for (const int i : rep(0, p)) {
const M l = a[i], r = a[i + p];
a[i] = l + r;
a[i + p] = l - r;
}
len -= 1;
} else {
const int p = 1 << (h - len);
M rot = 1;
for (const int s : rep(0, 1 << (len - 2))) {
const int t = s << (h - len + 2);
const M rot2 = rot * rot;
const M rot3 = rot2 * rot;
for (const int i : rep(0, p)) {
const M a0 = a[i + t];
const M a1 = a[i + t + p];
const M a2 = a[i + t + p * 2];
const M a3 = a[i + t + p * 3];
const M ax = (a2 - a3) * iroot[2];
a[i + t] = a0 + a1 + a2 + a3;
a[i + t + p] = (a0 - a1 + ax) * rot;
a[i + t + p * 2] = (a0 + a1 - a2 - a3) * rot2;
a[i + t + p * 3] = (a0 - a1 - ax) * rot3;
}
if (((s + 1) >> (len - 2)) == 0) rot *= irate3[countr_zero(~s)];
}
len -= 2;
}
}
}
};
} // namespace proconlib