1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#pragma once


#include <vector><--- Include file:  not found. Please note: Cppcheck does not need standard library headers to get proper results.
#include <functional><--- Include file:  not found. Please note: Cppcheck does not need standard library headers to get proper results.


#include "snippet/internal/types.hpp"
#include "snippet/iterations.hpp"

#include "internal/dev_env.hpp"

#include "numeric/arithmetic.hpp"


namespace uni {


static inline i64 float_div(const i64 n, const i64 p) {
    return static_cast<i64>(static_cast<double>(n) / static_cast<double>(p));
};

// Thanks to: https://nyaannyaan.github.io/library/multiplicative-function/prime-counting-faster.hpp
__attribute__((target("avx2"), optimize("O3", "unroll-loops")))
i64 count_primes(const u64 n) noexcept(NO_EXCEPT) {
    if(n == 0 || n == 1) return 0;

    const i64 sqrt_n = uni::sqrt_floor(n);
    const i64 m = float_div(n, sqrt_n);

    std::vector<i64> hl(m);
    REP(i, 1, m) hl[i] = float_div(n, i) - 1;

    std::vector<i32> hs(sqrt_n + 1);
    std::iota(ALL(hs), -1);

    for(i32 x=2, pi=0; x <= sqrt_n; ++x) {
        if(hs[x] == hs[x - 1]) continue;
        const i64 x2 = static_cast<i64>(x) * x;
        const i64 imax = std::min(m, float_div(n, x2) + 1);

        i64 ix = x;
        REP(i, 1, imax) {
            hl[i] -= (ix < m ? hl[ix] : hs[float_div(n, ix)]) - pi;
            ix += x;
        }
        FORD(k, x2, sqrt_n) hs[k] -= hs[float_div(k, x)] - pi;

        ++pi;
    }

    return hl[1];
}


inline i64 count_primes(const i64 l, const i64 r) noexcept(NO_EXCEPT) {
    assert(l <= r);
    return count_primes(r) - count_primes(l - 1);
}


} // namespace uni