(Analysis by Benjamin Qi)

Note: I index the cows as $0\ldots N-1$ rather than $1\ldots N$.

To solve this problem in $\mathcal{O}(N^2)$ time, fix $r$ and iterate over all possible $l$ in decreasing order. Let $\texttt{unique}_{l,r}$ equal the number of cows in the interval $l\ldots r$ whose breeds are unique within that interval. When we decrease $l$ by one,

#include<bits/stdc++.h>
using namespace std;
 
int main() {
	int N; cin >> N;
	vector<int> B(N); for (int& b: B) cin >> b;
	int64_t ans = 0;
	for (int r = 0; r < N; ++r) {
		vector<int> occ(N+1);
		int unique = 0;
		for (int l = r-1; l >= 0; --l) {
			if (B[l] == B[r]) break;
			int& o = occ[B[l]]; ++o;
			if (o == 1) {
				ans += unique;
				++unique;
			} else if (o == 2) {
				--unique;
			}
		}
	}
	cout << ans << "\n";
}

Essentially, we're computing arrays $\texttt{active}_r[l]$, $\texttt{unique}_r[l]$, $\texttt{val}_r[l]$ for each $r$ from $0\ldots N-1$. For each $l\le r$, define

For every $r$, we add a suffix of $\texttt{val}_r$ to the answer.

To solve this problem in $\mathcal{O}(N\log N)$ time, we must be able to efficiently transition from $r-1$ to $r$. Define $\texttt{prev_oc}[j]$ to equal the maximum index $i<j$ such that $B_i=B_j$. $\texttt{active}_r$ and $\texttt{unique}_r$ are the same as $\texttt{active}_{r-1}$ and $\texttt{unique}_{r-1}$ respectively except for the following changes:

These updates and range sum queries over $\texttt{val}_r$ can be supported in $\mathcal{O}(\log N)$ time each using a segment tree with lazy propagation. At each segment $x\ldots y$ of the tree, maintain $\sum_{i=x}^y\texttt{active}_r[i]$, $\sum_{i=x}^y\texttt{val}_r[i]$, and a lazy value that the values of all active indices in the segment should be increased by. You can check the analysis for Counting Haybales for an introduction to lazy segment trees.

#include<bits/stdc++.h>
using namespace std;

using T = uint64_t;
const int SZ = 1<<18;

struct LazySeg {
	T sum[2*SZ], lazy[2*SZ], num_active[2*SZ];
	LazySeg() {
		for (int i = 0; i < SZ; ++i) 
			num_active[SZ+i] = 1;
		for (int i = SZ-1; i > 0; --i) 
			num_active[i] = num_active[2*i]+num_active[2*i+1];
	}
	void push(int ind, int L, int R) {
		sum[ind] += num_active[ind]*lazy[ind];
		if (L != R) for (int i = 0; i < 2; ++i) 
			lazy[2*ind+i] += lazy[ind];
		lazy[ind] = 0;
	}
	void pull(int ind) {
		sum[ind] = sum[2*ind]+sum[2*ind+1];
		num_active[ind] = num_active[2*ind]+num_active[2*ind+1];
	}
	void increment(int lo,int hi, int val, int ind=1,int L=0, int R=SZ-1) {
		push(ind,L,R); if (hi < L || R < lo) return;
		if (lo <= L && R <= hi) { 
			lazy[ind] = val; push(ind,L,R); return; }
		int M = (L+R)/2; 
		increment(lo,hi,val,2*ind,L,M); 
		increment(lo,hi,val,2*ind+1,M+1,R); 
		pull(ind);
	}
	T query(int lo, int hi, int ind=1, int L=0, int R=SZ-1) {
		push(ind,L,R); 
		if (lo > R || L > hi) return 0;
		if (lo <= L && R <= hi) return sum[ind];
		int M = (L+R)/2;
		return query(lo,hi,2*ind,L,M)+query(lo,hi,2*ind+1,M+1,R);
	}
	void deactivate(int pos, int ind=1, int L=0, int R=SZ-1) {
		push(ind,L,R); 
		if (pos > R || L > pos) return;
		if (pos <= L && R <= pos) {
			assert(num_active[ind] == 1);
			sum[ind] = num_active[ind] = 0;
			return;
		}
		int M = (L+R)/2; 
		deactivate(pos,2*ind,L,M);
		deactivate(pos,2*ind+1,M+1,R);
		pull(ind);
	}
} Seg;

int main() {
	cin.tie(0)->sync_with_stdio(0);
	int N; cin >> N;
	vector<int> B(N); for (int& b: B) cin >> b;
	vector<int> last(N+1,-1), prev_oc(N);
	int64_t ans = 0;
	for (int r = 0; r < N; ++r) {
		int& last_oc = last[B[r]];
		ans += Seg.query(last_oc+1,SZ-1);
		if (last_oc != -1) {
			Seg.deactivate(last_oc);
			Seg.increment(prev_oc[last_oc],last_oc-1,-1);
		}
		Seg.increment(last_oc,r-1,1);
		prev_oc[r] = last_oc; last_oc = r;
	}
	cout << ans << "\n";
}

Danny Mittal's code:

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.StringTokenizer;
 
public class TriplesOfCows {
 
    public static void main(String[] args) throws IOException {
        BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
        int n = Integer.parseInt(in.readLine());
        StringTokenizer tokenizer = new StringTokenizer(in.readLine());
        int[] last2 = new int[n + 1];
        int[] last = new int[n + 1];
        long answer = 0;
        SegmentTree segTree = new SegmentTree(n);
        for (int j = 1; j <= n; j++) {
            int k = Integer.parseInt(tokenizer.nextToken());
            segTree.updateSingle(last[k], -1);
            segTree.updateRange(last2[k] + 1, last[k] - 1, -1);
            answer += segTree.query(last[k] + 1, j - 1);
            segTree.updateRange(last[k] + 1, j - 1, 1);
            segTree.updateSingle(j, 1);
            last2[k] = last[k];
            last[k] = j;
        }
        System.out.println(answer);
    }
 
    static class SegmentTree {
        final int n;
        final long[] value = new long[530000];
        final long[] singles = new long[530000];
        final long[] lazy = new long[530000];
 
        SegmentTree(int n) {
            this.n = n;
        }
 
        void propagate(int node) {
            value[2 * node] += lazy[node] * singles[2 * node];
            value[(2 * node) + 1] += lazy[node] * singles[(2 * node) + 1];
            lazy[2 * node] += lazy[node];
            lazy[(2 * node) + 1] += lazy[node];
            lazy[node] = 0;
        }
 
        void updateSingle(int index, long delta, int node, int segFrom, int segTo) {
            if (segFrom == segTo) {
                value[node] += delta * lazy[node];
                singles[node] += delta;
            } else {
                propagate(node);
                int mid = (segFrom + segTo) / 2;
                if (index <= mid) {
                    updateSingle(index, delta, 2 * node, segFrom, mid);
                } else {
                    updateSingle(index, delta, (2 * node) + 1, mid + 1, segTo);
                }
                value[node] = value[2 * node] + value[(2 * node) + 1];
                singles[node] = singles[2 * node] + singles[(2 * node) + 1];
            }
        }
 
        void updateSingle(int index, long delta) {
            if (index > 0) {
                updateSingle(index, delta, 1, 1, n);
            }
        }
 
        void updateRange(int from, int to, long delta, int node, int segFrom, int segTo) {
            if (segTo < from || to < segFrom) {
 
            } else if (from <= segFrom && segTo <= to) {
                value[node] += delta * singles[node];
                lazy[node] += delta;
            } else {
                propagate(node);
                int mid = (segFrom + segTo) / 2;
                updateRange(from, to, delta, 2 * node, segFrom, mid);
                updateRange(from, to, delta, (2 * node) + 1, mid + 1, segTo);
                value[node] = value[2 * node] + value[(2 * node) + 1];
                singles[node] = singles[2 * node] + singles[(2 * node) + 1];
            }
        }
 
        void updateRange(int from, int to, long delta) {
            updateRange(from, to, delta, 1, 1, n);
        }
 
        long query(int from, int to, int node, int segFrom, int segTo) {
            if (segTo < from || to < segFrom) {
                return 0;
            } else if (from <= segFrom && segTo <= to) {
                return value[node];
            } else {
                propagate(node);
                int mid = (segFrom + segTo) / 2;
                return query(from, to, 2 * node, segFrom, mid) + query(from, to, (2 * node) + 1, mid + 1, segTo);
            }
        }
 
        long query(int from, int to) {
            return query(from, to, 1, 1, n);
        }
    }
}