algorithm/convolution_int.cpp

View this file on GitHub
Last update: 2022-01-28 13:07:07+09:00

Depends on

Code

#pragma once
#include <algorithm>
#include <type_traits>
#include <vector>
#include "../math/mod_inv.cpp"
#include "../math/rem_euclid.cpp"
#include "../utility/int_alias.cpp"
#include "../utility/rep.cpp"
#include "convolution_mod.cpp"

template <class T, std::enable_if_t<std::is_integral_v<T>>* = nullptr>
std::vector<T> convolution_int(const std::vector<T>& a, const std::vector<T>& b) {
    const int n = a.size(), m = b.size();
    if (n == 0 || m == 0) return {};
    static constexpr u64 MOD1 = 754974721, MOD2 = 167772161, MOD3 = 469762049;
    static constexpr u64 M2M3 = MOD2 * MOD3, M1M3 = MOD1 * MOD3, M1M2 = MOD1 * MOD2;
    static constexpr u64 M1M2M3 = MOD1 * MOD2 * MOD3;
    static constexpr u64 I1 = mod_inv(MOD2 * MOD3, MOD1);
    static constexpr u64 I2 = mod_inv(MOD1 * MOD3, MOD2);
    static constexpr u64 I3 = mod_inv(MOD1 * MOD2, MOD3);
    static constexpr u64 OFFSET[5] = {0, 0, M1M2M3, 2 * M1M2M3, 3 * M1M2M3};
    std::vector<T> c1 = convolution_mod<T, MOD1>(a, b);
    std::vector<T> c2 = convolution_mod<T, MOD2>(a, b);
    std::vector<T> c3 = convolution_mod<T, MOD3>(a, b);
    std::vector<T> c(n + m - 1);
    for (const int i : rep(n + m - 1)) {
        u64 x = 0;
        x += (c1[i] * I1) % MOD1 * M2M3;
        x += (c2[i] * I2) % MOD2 * M1M3;
        x += (c3[i] * I3) % MOD3 * M1M2;
        i64 diff = c1[i] - rem_euclid<i64>(x, MOD1);
        if (diff < 0) diff += MOD1;
        c[i] = x - OFFSET[diff % 5];
    }
    return c;
}

#line 2 "algorithm/convolution_int.cpp"
#include <algorithm>
#include <type_traits>
#include <vector>
#line 2 "math/mod_inv.cpp"
#include <cassert>
#line 3 "math/inv_gcd.cpp"
#include <utility>
#line 3 "math/rem_euclid.cpp"

template <class T> constexpr T rem_euclid(T value, const T& mod) {
    assert(mod > 0);
    return (value %= mod) >= 0 ? value : value + mod;
}
#line 5 "math/inv_gcd.cpp"

template <class T> constexpr std::pair<T, T> inv_gcd(const T& a, const T& b) {
    using U = std::make_signed_t<T>;
    U t = rem_euclid(a, b);
    if (t == 0) return {b, 0};
    U s = b, m0 = 0, m1 = 1;
    while (t != 0) {
        const U u = s / t;
        s -= t * u;
        m0 -= m1 * u;
        U tmp = s;
        s = t;
        t = tmp;
        tmp = m0;
        m0 = m1;
        m1 = tmp;
    }
    if (m0 < 0) m0 += b / s;
    return {(T)s, (T)m0};
}
#line 4 "math/mod_inv.cpp"

template <class T> constexpr T mod_inv(const T& a, const T& mod) {
    const auto [g, x] = inv_gcd(a, mod);
    assert(g == 1);
    return x;
}
#line 2 "utility/int_alias.cpp"
#include <cstdint>

using i32 = std::int32_t;
using u32 = std::uint32_t;
using i64 = std::int64_t;
using u64 = std::uint64_t;
using i128 = __int128_t;
using u128 = __uint128_t;
#line 3 "utility/rep.cpp"

class Range {
    struct Iter {
        int itr;
        constexpr Iter(const int pos) noexcept : itr(pos) {}
        constexpr void operator++() noexcept { ++itr; }
        constexpr bool operator!=(const Iter& other) const noexcept { return itr != other.itr; }
        constexpr int operator*() const noexcept { return itr; }
    };
    const Iter first, last;

  public:
    explicit constexpr Range(const int first, const int last) noexcept : first(first), last(std::max(first, last)) {}
    constexpr Iter begin() const noexcept { return first; }
    constexpr Iter end() const noexcept { return last; }
};

constexpr Range rep(const int l, const int r) noexcept { return Range(l, r); }
constexpr Range rep(const int n) noexcept { return Range(0, n); }
#line 2 "internal/modulo_transform.cpp"
#include <array>
#line 3 "internal/barret_reduction.cpp"

namespace proconlib {

class BarretReduction {
    u32 mod;
    u64 near_inv;

  public:
    explicit constexpr BarretReduction(const u32 mod) noexcept : mod(mod), near_inv((u64)(-1) / mod + 1) {}
    constexpr u32 product(const u32 a, const u32 b) const noexcept {
        const u64 z = (u64)a * b;
        const u64 x = ((u128)z * near_inv) >> 64;
        const u32 v = z - x * mod;
        return v < mod ? v : v + mod;
    }
    constexpr u32 get_mod() const noexcept { return mod; }
};

}  // namespace proconlib
#line 7 "math/mod_pow.cpp"

template <class T> constexpr u32 mod_pow(T x, u64 exp, const u32 mod) {
    assert(mod > 0);
    if (mod == 1) return 0;
    const proconlib::BarretReduction bt(mod);
    u32 ret = 1, mul = rem_euclid<std::common_type_t<T, i64>>(x, mod);
    for (; exp > 0; exp >>= 1) {
        if (exp & 1) ret = bt.product(ret, mul);
        mul = bt.product(mul, mul);
    }
    return ret;
}
#line 5 "math/primitive_root.cpp"

constexpr u32 primitive_root(const u32 mod) {
    std::array<u32, 32> exp{};
    u32 cur = mod - 1;
    int size = 0;
    for (u32 i = 2; i * i <= cur; ++i) {
        if (cur % i == 0) {
            exp[size++] = (mod - 1) / i;
            while (cur % i == 0) cur /= i;
        }
    }
    if (cur != 1) exp[size++] = (mod - 1) / cur;
    for (u32 check = 1; check < mod; ++check) {
        for (const u32 e : exp) {
            if (e == 0) return check;
            if (mod_pow(check, e, mod) == 1) break;
        }
    }
    return mod;
}
#line 2 "internal/enable_avx2.cpp"

#ifdef ENABLE_AVX2
#define TARGET_AVX2 __attribute__((target("avx2")))
#else
#define TARGET_AVX2
#endif
#line 5 "utility/countr_zero.cpp"

constexpr int countr_zero(u64 x) {
    if (x == 0) return 64;
#ifdef __GNUC__
    return __builtin_ctzll(x);
#else
    constexpr std::array<int, 64> table = {0,  1,  2,  7,  3,  13, 8,  27, 4,  33, 14, 36, 9,  49, 28, 19,
                                             5,  25, 34, 17, 15, 53, 37, 55, 10, 46, 50, 39, 29, 42, 20, 57,
                                             63, 6,  12, 26, 32, 35, 48, 18, 24, 16, 52, 54, 45, 38, 41, 56,
                                             62, 11, 31, 47, 23, 51, 44, 40, 61, 30, 22, 43, 60, 21, 59, 58};
    return table[(x & (~x + 1)) * 0x218A7A392DD9ABF >> 58 & 0x3F];
#endif
}
#line 3 "utility/revrep.cpp"

class ReversedRange {
    struct Iter {
        int itr;
        constexpr Iter(const int pos) noexcept : itr(pos) {}
        constexpr void operator++() noexcept { --itr; }
        constexpr bool operator!=(const Iter& other) const noexcept { return itr != other.itr; }
        constexpr int operator*() const noexcept { return itr; }
    };
    const Iter first, last;

  public:
    explicit constexpr ReversedRange(const int first, const int last) noexcept
        : first(last - 1), last(std::min(first, last) - 1) {}
    constexpr Iter begin() const noexcept { return first; }
    constexpr Iter end() const noexcept { return last; }
};

constexpr ReversedRange revrep(const int l, const int r) noexcept { return ReversedRange(l, r); }
constexpr ReversedRange revrep(const int n) noexcept { return ReversedRange(0, n); }
#line 9 "internal/modulo_transform.cpp"

namespace proconlib {

template <class M> struct ModuloTransform {
    static constexpr u32 MOD = M::mod();
    static constexpr u32 ROOT = primitive_root(MOD);
    static constexpr int RANK = countr_zero(MOD - 1);

    std::array<M, RANK + 1> root, iroot;
    std::array<M, (RANK >= 2 ? RANK - 2 + 1 : 0)> rate2, irate2;
    std::array<M, (RANK >= 3 ? RANK - 3 + 1 : 0)> rate3, irate3;

    constexpr ModuloTransform() : root(), iroot(), rate2(), irate2(), rate3(), irate3() {
        root[RANK] = M(ROOT).pow((MOD - 1) >> RANK);
        iroot[RANK] = root[RANK].inv();
        for (const int i : revrep(0, RANK)) {
            root[i] = root[i + 1] * root[i + 1];
            iroot[i] = iroot[i + 1] * iroot[i + 1];
        }
        M prod = 1, iprod = 1;
        for (const int i : rep(2, RANK + 1)) {
            rate2[i - 2] = root[i] * prod;
            irate2[i - 2] = iroot[i] * iprod;
            prod *= iroot[i];
            iprod *= root[i];
        }
        prod = 1, iprod = 1;
        for (const int i : rep(3, RANK + 1)) {
            rate3[i - 3] = root[i] * prod;
            irate3[i - 3] = iroot[i] * iprod;
            prod *= iroot[i];
            iprod *= root[i];
        }
    }

    void butterfly(std::vector<M>& a) const {
        const int n = a.size();
        const int h = countr_zero(n);
        for (int len = 0; len < h;) {
            if (len + 1 == h) {
                M rot = 1;
                for (const int s : rep(0, 1 << len)) {
                    const int t = s << 1;
                    const M l = a[t], r = a[t + 1] * rot;
                    a[t] = l + r;
                    a[t + 1] = l - r;
                    if (((s + 1) >> len) == 0) rot *= rate2[countr_zero(~s)];
                }
                len += 1;
            } else {
                const int p = 1 << (h - len - 2);
                M rot = 1;
                for (const int s : rep(0, 1 << len)) {
                    const int t = s << (h - len);
                    const M rot2 = rot * rot;
                    const M rot3 = rot2 * rot;
                    for (const int i : rep(0, p)) {
                        const M a0 = a[i + t];
                        const M a1 = a[i + t + p] * rot;
                        const M a2 = a[i + t + p * 2] * rot2;
                        const M a3 = a[i + t + p * 3] * rot3;
                        const M ax = (a1 - a3) * root[2];
                        a[i + t] = a0 + a1 + a2 + a3;
                        a[i + t + p] = a0 - a1 + a2 - a3;
                        a[i + t + p * 2] = a0 - a2 + ax;
                        a[i + t + p * 3] = a0 - a2 - ax;
                    }
                    if (((s + 1) >> len) == 0) rot *= rate3[countr_zero(~s)];
                }
                len += 2;
            }
        }
    }

    void butterfly_inv(std::vector<M>& a) const {
        const int n = a.size();
        const int h = countr_zero(n);
        for (int len = h; len > 0;) {
            if (len == 1) {
                const int p = n >> 1;
                for (const int i : rep(0, p)) {
                    const M l = a[i], r = a[i + p];
                    a[i] = l + r;
                    a[i + p] = l - r;
                }
                len -= 1;
            } else {
                const int p = 1 << (h - len);
                M rot = 1;
                for (const int s : rep(0, 1 << (len - 2))) {
                    const int t = s << (h - len + 2);
                    const M rot2 = rot * rot;
                    const M rot3 = rot2 * rot;
                    for (const int i : rep(0, p)) {
                        const M a0 = a[i + t];
                        const M a1 = a[i + t + p];
                        const M a2 = a[i + t + p * 2];
                        const M a3 = a[i + t + p * 3];
                        const M ax = (a2 - a3) * iroot[2];
                        a[i + t] = a0 + a1 + a2 + a3;
                        a[i + t + p] = (a0 - a1 + ax) * rot;
                        a[i + t + p * 2] = (a0 + a1 - a2 - a3) * rot2;
                        a[i + t + p * 3] = (a0 - a1 - ax) * rot3;
                    }
                    if (((s + 1) >> (len - 2)) == 0) rot *= irate3[countr_zero(~s)];
                }
                len -= 2;
            }
        }
    }
};

}  // namespace proconlib
#line 2 "math/static_modint.cpp"
#include <ostream>
#line 2 "math/totient.cpp"

template <class T> constexpr T totient(T x) {
    T ret = x;
    for (T i = 2; i * i <= x; ++i) {
        if (x % i == 0) {
            ret /= i;
            ret *= i - 1;
            while (x % i == 0) x /= i;
        }
    }
    if (x > 1) {
        ret /= x;
        ret *= x - 1;
    }
    return ret;
}
#line 7 "math/static_modint.cpp"

template <u32 MOD, std::enable_if_t<((u32)1 <= MOD and MOD <= ((u32)1 << 31))>* = nullptr> class StaticModint {
    using Self = StaticModint;

    static inline constexpr u32 PHI = totient(MOD);
    u32 v;

  public:
    static constexpr u32 mod() noexcept { return MOD; }

    template <class T, std::enable_if_t<std::is_integral_v<T>>* = nullptr>
    static constexpr T normalize(const T& x) noexcept {
        return rem_euclid<std::common_type_t<T, i64>>(x, MOD);
    }

    constexpr StaticModint() noexcept : v(0) {}
    template <class T> constexpr StaticModint(const T& x) noexcept : v(normalize(x)) {}
    template <class T> static constexpr Self raw(const T& x) noexcept {
        Self ret;
        ret.v = x;
        return ret;
    }

    constexpr u32 val() const noexcept { return v; }
    constexpr Self neg() const noexcept { return raw(v == 0 ? 0 : MOD - v); }
    constexpr Self inv() const noexcept { return pow(PHI - 1); }
    constexpr Self pow(u64 exp) const noexcept {
        Self ret(1), mult(*this);
        for (; exp > 0; exp >>= 1) {
            if (exp & 1) ret *= mult;
            mult *= mult;
        }
        return ret;
    }

    constexpr Self operator-() const noexcept { return neg(); }
    constexpr Self operator~() const noexcept { return inv(); }

    constexpr Self operator+(const Self& rhs) const noexcept { return Self(*this) += rhs; }
    constexpr Self& operator+=(const Self& rhs) noexcept {
        if ((v += rhs.v) >= MOD) v -= MOD;
        return *this;
    }

    constexpr Self operator-(const Self& rhs) const noexcept { return Self(*this) -= rhs; }
    constexpr Self& operator-=(const Self& rhs) noexcept {
        if (v < rhs.v) v += MOD;
        v -= rhs.v;
        return *this;
    }

    constexpr Self operator*(const Self& rhs) const noexcept { return Self(*this) *= rhs; }
    constexpr Self& operator*=(const Self& rhs) noexcept {
        v = (u64)v * rhs.v % MOD;
        return *this;
    }

    constexpr Self operator/(const Self& rhs) const noexcept { return Self(*this) /= rhs; }
    constexpr Self& operator/=(const Self& rhs) noexcept { return *this *= rhs.inv(); }

    constexpr bool operator==(const Self& rhs) const noexcept { return v == rhs.v; }
    constexpr bool operator!=(const Self& rhs) const noexcept { return v != rhs.v; }
    friend std::ostream& operator<<(std::ostream& stream, const Self& rhs) { return stream << rhs.v; }
};

using Modint1000000007 = StaticModint<1000000007>;
using Modint998244353 = StaticModint<998244353>;
#line 4 "utility/countl_zero.cpp"

TARGET_AVX2 constexpr int countl_zero(u64 x) {
#ifdef __GNUC__
    return x == 0 ? 64 : __builtin_clzll(x);
#else
    x |= x >> 1;
    x |= x >> 2;
    x |= x >> 4;
    x |= x >> 8;
    x |= x >> 16;
    x |= x >> 32;
    return 64 - countr_zero(~x);
#endif
}
#line 4 "utility/bit_width.cpp"

TARGET_AVX2 constexpr int bit_width(const u64 x) { return 64 - countl_zero(x); }
#line 5 "utility/ceil_log2.cpp"

TARGET_AVX2 constexpr int ceil_log2(const u64 x) {
#ifdef __GNUC__
    return x == 0 ? 0 : bit_width(x - 1);
#else
    int e = 0;
    while (((u64)1 << e) < x) ++e;
    return e;
#endif
}
#line 9 "algorithm/convolution_mod.cpp"

namespace proconlib {

template <class T> std::vector<T> convolution_naive(const std::vector<T>& a, const std::vector<T>& b) {
    const int n = a.size(), m = b.size();
    std::vector<T> c(n + m - 1);
    if (n < m) {
        for (const int j : rep(m))
            for (const int i : rep(n)) c[i + j] += a[i] * b[j];
    } else {
        for (const int i : rep(n))
            for (const int j : rep(m)) c[i + j] += a[i] * b[j];
    }
    return c;
}

template <class M> std::vector<M> convolution_ntt(std::vector<M> a, std::vector<M> b) {
    constexpr ModuloTransform<M> transform;
    const int n = a.size(), m = b.size();
    const int k = 1 << ceil_log2(n + m - 1);
    a.resize(k), b.resize(k);
    transform.butterfly(a);
    transform.butterfly(b);
    for (const int i : rep(k)) a[i] *= b[i];
    transform.butterfly_inv(a);
    a.resize(n + m - 1);
    const M c = M(k).inv();
    for (const int i : rep(n + m - 1)) a[i] *= c;
    return a;
}

}  // namespace proconlib

template <class M> std::vector<M> convolution_mod(std::vector<M>&& a, std::vector<M>&& b) {
    const int n = a.size(), m = b.size();
    if (n == 0 || m == 0) return {};
    if (std::min(n, m) <= 60) return proconlib::convolution_naive(a, b);
    return proconlib::convolution_ntt(std::move(a), std::move(b));
}

template <class M> std::vector<M> convolution_mod(const std::vector<M>& a, const std::vector<M>& b) {
    const int n = a.size(), m = b.size();
    if (n == 0 || m == 0) return {};
    if (std::min(n, m) <= 60) return proconlib::convolution_naive(a, b);
    return proconlib::convolution_ntt(a, b);
}

template <class T, u32 MOD, std::enable_if_t<std::is_integral_v<T>>* = nullptr>
std::vector<T> convolution_mod(const std::vector<T>& a, const std::vector<T>& b) {
    const int n = a.size(), m = b.size();
    if (n == 0 || m == 0) return {};
    using M = StaticModint<MOD>;
    std::vector<M> a2(n), b2(m);
    for (const int i : rep(n)) a2[i] = a[i];
    for (const int i : rep(m)) b2[i] = b[i];
    std::vector<M> c2 = convolution_mod(std::move(a2), std::move(b2));
    std::vector<T> c(n + m - 1);
    for (const int i : rep(n + m - 1)) c[i] = c2[i].val();
    return c;
}
#line 10 "algorithm/convolution_int.cpp"

template <class T, std::enable_if_t<std::is_integral_v<T>>* = nullptr>
std::vector<T> convolution_int(const std::vector<T>& a, const std::vector<T>& b) {
    const int n = a.size(), m = b.size();
    if (n == 0 || m == 0) return {};
    static constexpr u64 MOD1 = 754974721, MOD2 = 167772161, MOD3 = 469762049;
    static constexpr u64 M2M3 = MOD2 * MOD3, M1M3 = MOD1 * MOD3, M1M2 = MOD1 * MOD2;
    static constexpr u64 M1M2M3 = MOD1 * MOD2 * MOD3;
    static constexpr u64 I1 = mod_inv(MOD2 * MOD3, MOD1);
    static constexpr u64 I2 = mod_inv(MOD1 * MOD3, MOD2);
    static constexpr u64 I3 = mod_inv(MOD1 * MOD2, MOD3);
    static constexpr u64 OFFSET[5] = {0, 0, M1M2M3, 2 * M1M2M3, 3 * M1M2M3};
    std::vector<T> c1 = convolution_mod<T, MOD1>(a, b);
    std::vector<T> c2 = convolution_mod<T, MOD2>(a, b);
    std::vector<T> c3 = convolution_mod<T, MOD3>(a, b);
    std::vector<T> c(n + m - 1);
    for (const int i : rep(n + m - 1)) {
        u64 x = 0;
        x += (c1[i] * I1) % MOD1 * M2M3;
        x += (c2[i] * I2) % MOD2 * M1M3;
        x += (c3[i] * I3) % MOD3 * M1M2;
        i64 diff = c1[i] - rem_euclid<i64>(x, MOD1);
        if (diff < 0) diff += MOD1;
        c[i] = x - OFFSET[diff % 5];
    }
    return c;
}