(Analysis by Benjamin Qi)

For each $0\le j<N$ we need to count the number of pairs $(x,y)$ such that $x<y$, $A[x]>A[y]$ and $A[y]<j$. It suffices to compute the number of $x<y$ such that $A[x]>A[y]$ for every $y$; call this quantity $n[y]$. Then $ans[j]=\sum_{A[y]<j}n[y]$ can be computed with prefix sums.

The value of $n[y]$ for each $y$ can be found via the following process:

  1. Set $h=N.$
  2. Maintain a collection of indices, initially empty.
  3. For each $y$ with $A[y]=h$, set the corresponding quantity for $y$ equal to the number of indices in the collection less than $y$.
  4. For each $y$ with $A[y]=h$, insert $y$ into the set.
  5. If $h=0,$ terminate. Otherwise, decrease $h$ by one and repeat from step 2.

The collection can be a policy-based data structure in C++ or a binary indexed tree.

My code:

#include "bits/stdc++.h"
 
using namespace std;
 
void setIO(string s) {
	ios_base::sync_with_stdio(0); cin.tie(0); 
	freopen((s+".in").c_str(),"r",stdin);
	freopen((s+".out").c_str(),"w",stdout);
}

#include <ext/pb_ds/tree_policy.hpp>
#include <ext/pb_ds/assoc_container.hpp>
using namespace __gnu_pbds;
template <class T> using Tree = tree<T, null_type, less<T>, 
	rb_tree_tag, tree_order_statistics_node_update>; 
 
const int MX = 1e5+5;

int N;
long long numInv[MX];
vector<int> todo[MX];
 
int main() {
	setIO("haircut");
	int N; cin >> N;
	vector<int> A(N); for (int& t: A) cin >> t;
	for (int i = 0; i < N; ++i) todo[A[i]].push_back(i);
	Tree<int> T;
	for (int i = N; i >= 0; --i) {
		for (int t: todo[i]) numInv[i+1] += T.order_of_key(t);
		for (int t: todo[i]) T.insert(t);
	}
	for (int i = 1; i < N; ++i) numInv[i] += numInv[i-1];
	for (int i = 0; i < N; ++i) cout << numInv[i] << "\n";
}

Dhruv Rohatgi's code:

#include <iostream>
#include <algorithm>
using namespace std;
#define MAXN 100005
 
int N;
int A[100000];
int T[MAXN+1];
 
int getSum(int i)
{
	int c=0;
	for(++i; i > 0 ; i -= (i & -i))
		c += T[i];
	return c;
}
void set(int i,int dif)
{
	for(++i; i < MAXN ; i += (i & -i))
		T[i] += dif;
}
 
long long cnt[100000];
 
int main()
{
	freopen("haircut.in","r",stdin);
	freopen("haircut.out","w",stdout);
	cin >> N;
	int a;
	for(int i=0;i<N;i++)
	{
		cin >> a;
		a++;
		cnt[a] += i - getSum(a);
		set(a, 1);
	}
	long long ans = 0;
	for(int j=1;j<=N;j++)
	{
		cout << ans << '\n';
		ans += cnt[j];
	}
}