(Analysis by Benjamin Qi )

Let $N$ denote the number of distinct letters that appear in the input string (we can disregard all other letters). Note how the scoring gives bounds on $N$ (rather unoriginally).

From the analysis for the Bronze version of this problem, if we are given the order of the cowphabet, then the minimum number of times the alphabet is hummed is equal to one plus the number of consecutive pairs of letters such that the first letter in the pair comes at the same position or after the second letter in the pair. Specifically, if the order of the cowphabet is $c_1,c_2,\ldots, c_N$, then the minimum number of times the cowphabet was hummed is

$$\text{evaluate}([c_1,c_2,\ldots,c_N])=1+\sum_{i=1}^N\sum_{j=1}^i(\text{# occurrences of }c_ic_j).$$

Subtask: $N\le 8$

First, process the input string such that for any two letters $c_1$ and $c_2$, we know the number of occurrences of the substring $c_1c_2$. Now we can iterate over all $N!$ possible orders of the cowphabet and compute the quantity above in $\mathcal{O}(N^2)$ time.

Time Complexity: $\mathcal{O}(N!\cdot N^2+\text{length}(\text{input string}))$

Full Solution: $N\le 20$

Do dynamic programming on subsets. The DP state for a subset $S=\{c_1,c_2,\ldots,c_n\}$ of the cowphabet is $\min_p\left(\text{evaluate}(p)\right)$ over all $n!$ permutations $p$ of $S$. The DP transition for all nonempty $S$ is as follows:

$$\texttt{dp}[S]=\min_{c_j\in S}\left(\texttt{dp}[S\setminus \{c_j\}]+\sum_{c_k\in S}(\text{# occurrences of }c_jc_k)\right)$$

Time Complexity: $\mathcal{O}(2^N\cdot N^2+\text{length}(\text{input string}))$

Danny Mittal's code:

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.Map;
 
public class UdderedButNotHerdGold {
    public static void main(String[] args) throws IOException {
        BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
        String heard = in.readLine();
        Map<Character, Integer> index = new HashMap<>();
        for (char letter : heard.toCharArray()) {
            if (!index.containsKey(letter)) {
                index.put(letter, index.size());
            }
        }
        int n = index.size();
        int[][] adjacent = new int[n][n];
        for (int j = 1; j < heard.length(); j++) {
            adjacent[index.get(heard.charAt(j - 1))][index.get(heard.charAt(j))]++;
        }
        int[] dp = new int[1 << n];
        dp[0] = 1;
        for (int mask = 1; mask < (1 << n); mask++) {
            dp[mask] = heard.length();
            for (int j = 0; j < n; j++) {
                if ((mask & (1 << j)) != 0) {
                    int sum = dp[mask ^ (1 << j)];
                    for (int k = 0; k < n; ++k) if ((mask & (1 << k)) != 0) sum += adjacent[j][k];
                    dp[mask] = Math.min(dp[mask], sum);
                }
            }
        }
        System.out.println(dp[(1 << n) - 1]);
    }
}

It was possible (but not necessary) to remove a factor of $N$ from the runtime.

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.Map;
 
public class UdderedButNotHerdGold {
    public static void main(String[] args) throws IOException {
        BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
        String heard = in.readLine();
        Map<Character, Integer> index = new HashMap<>();
        for (char letter : heard.toCharArray()) {
            if (!index.containsKey(letter)) {
                index.put(letter, index.size());
            }
        }
        int n = index.size();
        int[][] adjacent = new int[n][n];
        for (int j = 1; j < heard.length(); j++) {
            adjacent[index.get(heard.charAt(j - 1))][index.get(heard.charAt(j))]++;
        }
        int[][] sums = new int[n][1 << n];
        int[] dp = new int[1 << n];
        dp[0] = 1;
        for (int mask = 1; mask < (1 << n); mask++) {
            dp[mask] = heard.length();
            int k = 0;
            while ((mask & (1 << k)) == 0) {
                k++;
            }
            for (int j = 0; j < n; j++) {
                sums[j][mask] = sums[j][mask - (1 << k)] + adjacent[j][k];
                if ((mask & (1 << j)) != 0) {
                    dp[mask] = Math.min(dp[mask], dp[mask - (1 << j)] + sums[j][mask]);
                }
            }
        }
        System.out.println(dp[(1 << n) - 1]);
    }
}

However, this solution uses $\Theta(N\cdot 2^N)$ memory. We can reduce the required memory to $\mathcal{O}(2^N)$ and the runtime by a constant factor by using two 2D arrays to store the sums rather than one:

#include <bits/stdc++.h>

#define f first
#define s second

using namespace std;

int stor1[1<<10][20], stor2[1<<10][20];
 
int main() {
	string s; cin >> s;
	map<char,int> m; for(char c: s) m[c] = 0;
	int cnt = 0; for (auto& t: m) t.s = cnt++;
	int N = m.size(); assert(N <= 20);
	vector<vector<int>> oc(N,vector<int>(N));
	for (int i = 0; i+1 < s.size(); ++i) 
		++oc[m[s[i]]][m[s[i+1]]];
	vector<int> dp(1<<N,1e9);
	dp[0] = 1;
	int bits = N/2;
	for (int j = 0; j < N; ++j) {
		for (int i = 0; i < 1<<bits; ++i) 
			for (int k = 0; k < bits; ++k) if (i&1<<k) 
				stor1[i][j] += oc[k][j];
		for (int i = 0; i < 1<<(N-bits); ++i) 
			for (int k = 0; k < N-bits; ++k) if (i&1<<k)
				stor2[i][j] += oc[bits+k][j];
	}
	for (int i = 0; i < 1<<N; ++i)
		for (int j = 0; j < N; ++j) if (i&1<<j) {
			int sum = dp[i^1<<j];
			sum += stor1[i&((1<<bits)-1)][j];
			sum += stor2[i>>bits][j];
			dp[i] = min(dp[i],sum);
		}
	cout << dp[(1<<N)-1] << "\n";
}