(Analysis by Benjamin Qi)

Subtask 1: $N\le 50$

If no pair of equal elements exists, then the answer is $0$. Otherwise, if two equal elements exist, then we must operate on at least one of them; choose any of them to operate on and increase the answer by one.

Naively checking for two equal elements takes $O(N^2)$ time, and the answer turns out to be $O(N^2)$, so this solution runs in $O(N^4)$ time overall.

#include <bits/stdc++.h>
using namespace std;

int main() {
    ios::sync_with_stdio(false);
    cin.tie(nullptr);

    int T;
    cin >> T;
    for (int tc = 0; tc < T; ++tc) {
        int N, K;
        cin >> N >> K;
        vector<int> A(N);
        for (auto &i : A) cin >> i;

        int64_t ans = 0;
        while (true) {
            for (int i = 0; i < N; ++i)
                for (int j = i + 1; j < N; ++j)
                    if (A[i] == A[j]) {
                        A[j] += K;
                        goto done;
                    }
            break;
        done:
            ++ans;
        }
        cout << ans << "\n";
    }
}

Subtask 2: $N\le 2000$

To simplify the implementation for this subtask as well as the full solution, we first note that if $K$ is negative, we can make it positive without changing the answer by negating $K$ and applying the transformation $a_i\to N+1-a_i$. For the remainder of the analysis, we assume $K>0$.

Consider all elements equal to the minimum element. All but one of these elements need to be increased. We can select one of these elements to leave as is, remove it from the array, and increase the rest.

Each iteration takes $O(N)$ time, and the array will be empty after at most $N$ iterations, so this runs in $O(N^2)$ time overall.

#include <bits/stdc++.h>
using namespace std;

int main() {
    ios::sync_with_stdio(false);
    cin.tie(nullptr);

    int T;
    cin >> T;
    for (int tc = 0; tc < T; ++tc) {
        int N, K;
        cin >> N >> K;
        vector<int64_t> A(N);
        for (auto &i : A) cin >> i;

        if (K < 0) {
            for (auto &i : A) i *= -1;
            K *= -1;
        }

        int64_t ans = 0;
        while (!A.empty()) {
            int64_t m = *min_element(begin(A), end(A));
            int is_min = 0;
            vector<int64_t> nA;
            for (auto v : A) {
                if (v == m) ++is_min;
                else nA.push_back(v);
            }
            ans += is_min - 1;
            for (int i = 0; i < is_min - 1; ++i) nA.push_back(m + K);
            swap(A, nA);
        }

        cout << ans << "\n";
    }
}

Subtask 3: $K=1$

Let's store the number of occurrences of each value in the array $A$ rather than $A$ itself. Now we can follow similar logic as the previous subtask. Start the minimum value at $1$ and increase it until we've processed all array elements. At each step, we process all elements equal to the current minimum value in $O(1)$ time, for an overall time complexity of $O(N)$. Make sure to store the answer in a 64-bit integer, or else you will get wrong answer.

#include <bits/stdc++.h>
using namespace std;

int main() {
    ios::sync_with_stdio(false);
    cin.tie(nullptr);

    int T;
    cin >> T;
    for (int tc = 0; tc < T; ++tc) {
        int N, K;
        cin >> N >> K;
        vector<int> cnt(N);
        for (int i = 0; i < N; ++i) {
            int x;
            cin >> x;
            ++cnt.at(x - 1);
        }
        assert(K == 1);

        int64_t ans = 0;
        int64_t cur_min = 0;
        int cnt_min = 0;
        while (cur_min < N || cnt_min > 0) {
            if (cur_min < N) cnt_min += cnt.at(cur_min);
            if (cnt_min > 0) ans += cnt_min - 1;
            ++cur_min;
            if (cnt_min > 0) --cnt_min;
        }

        cout << ans << "\n";
    }
}

Full Solution:

We can apply the same solution from subtask 3, but on each remainder modulo $K$ independently. The time complexity remains the same.

#include <bits/stdc++.h>
using namespace std;

int main() {
    ios::sync_with_stdio(false);
    cin.tie(nullptr);

    int T;
    cin >> T;
    for (int tc = 0; tc < T; ++tc) {
        int N, K;
        cin >> N >> K;
        vector<int> cnt(N);
        for (int i = 0; i < N; ++i) {
            int x;
            cin >> x;
            ++cnt.at(x - 1);
        }
        if (K < 0) {
            reverse(begin(cnt), end(cnt));
            K *= -1;
        }
        assert(0 < K && K <= N);
        int64_t ans = 0;

        for (int i = 0; i < K; ++i) {  // process remainder i mod K
            int64_t cur_min = i;
            int cnt_min = 0;
            while (cur_min < N || cnt_min > 0) {
                if (cur_min < N) cnt_min += cnt.at(cur_min);
                if (cnt_min > 0) ans += cnt_min - 1;
                cur_min += K;
                if (cnt_min > 0) --cnt_min;
            }
        }

        cout << ans << "\n";
    }
}