(Analysis by Ben Chen)

Subtask 1:

In this subtask no edges initially exist in the graph, so we will be adding all edges, which will then form a tree. In the dfs tree, the set of nodes in any subtree must form a contiguous range, so we can use range dp. Define $dp(i, j)$ to be the minimum cost on the range $[i, j]$, where $i$ is the root of a subtree connected to some series of subtrees under it containing the nodes $[i+1, j]$. Then

$$dp(i, j) = \text{min}_{i\leq k<j} dp(i, k)+dp(k+1, j)+a_{i,k+1}$$
This represents adding the subtree $[k+1, j]$ onto the existing subtree rooted at $i$ connected by the edge $i\to k+1$. The overall time complexity is $O(N^3)$.

My code:

#include <bits/stdc++.h>
using namespace std;
 
const int mxN=750, INF=1e9;
int n, a[mxN][mxN], dp[mxN][mxN];
 
int main() {
	ios::sync_with_stdio(0);
	cin.tie(0);
	cin >> n;
	for (int i = 1; i < n; ++i)
		for (int j = 0; j < i; ++j)
			cin >> a[j][i], a[i][j] = a[j][i];
	for (int i = n - 1; i >= 0; --i)
		for (int j = i + 1; j < n; ++j) {
			dp[i][j] = INF;
			for (int k = i; k < j; ++k)
				dp[i][j] = min(dp[i][j], dp[i][k] + dp[k + 1][j] + a[i][k + 1]);
		}
	cout << dp[0][n - 1];
	return 0;
}

Subtask 2:

This is similar to the previous subtask, except now we have to consider removal of edges. The new range $[k+1, j]$ should have no edges to any node in $[i+1, k]$ (otherwise the cross-linking between subtrees will violate the dfs ordering). Define

$$cut(i, k, j) = \sum_{x=i}^k\sum_{y=k+1}^j \max(0, a_{x,y})$$
or the cost to remove all edges between $[i, k]$ and $[k+1, j]$. Our updated dp is then
$$dp(i, j) = \text{min}_{i\leq k<j} dp(i, k)+dp(k+1, j)+\max(0, a_{i,k+1})+cut(i+1, k, j)$$
The overall time complexity is $O(N^5)$.

#include <bits/stdc++.h>
using namespace std;
 
const int mxN=750, INF=1e9;
int n, a[mxN][mxN], dp[mxN][mxN];
 
// note this is defined slightly differently
int cut(int l, int m, int r) {
	int res = 0;
	for (int i = l; i < m; ++i)
		for (int j = m; j <= r; ++j)
			res += max(-a[i][j], 0);
	return res;
}
 
int main() {
	ios::sync_with_stdio(0);
	cin.tie(0);
	cin >> n;
	for (int i = 1; i < n; ++i)
		for (int j = 0; j < i; ++j)
			cin >> a[j][i], a[i][j] = a[j][i];
	for (int i = n - 1; i >= 0; --i)
		for (int j = i + 1; j < n; ++j) {
			dp[i][j] = INF;
			for (int k = i; k < j; ++k)
				dp[i][j] = min(dp[i][j], dp[i][k] + dp[k + 1][j] + max(0, a[i][k + 1]) + cut(i + 1, k + 1, j));
		}
	cout << dp[0][n - 1];
	return 0;
}

Full Solution:

The full solution follows from computing $cut$ faster. This can be done with precomputation and prefix sums. Define 2d prefix sum
$$p(i, j) = \sum_{x=i}^j\sum_{y=x+1}^j \max(0, a_{x,y})$$
Then we can rewrite
$$cut(i, k, j) = p(i, j) - p(i, k) - p(k + 1, j)$$
This runs in $O(N^3)$.

#include <bits/stdc++.h>
using namespace std;
 
const int mxN=750, INF=1e9;
int n, a[mxN][mxN], dp[mxN][mxN], p[mxN][mxN];
 
int cut(int l, int m, int r) {
	if (l >= m || m > r)
		return 0;
	return p[l][r] - p[l][m - 1] - p[m][r];
}
 
int main() {
	ios::sync_with_stdio(0);
	cin.tie(0);
	cin >> n;
	for (int i = 1; i < n; ++i)
		for (int j = 0; j < i; ++j)
			cin >> a[j][i], a[i][j] = a[j][i];
	for (int i = n - 1; i >= 0; --i)
		for (int j = i + 1; j < n; ++j) {
			p[i][j] = p[i + 1][j] + p[i][j - 1] - p[i + 1][j - 1] + max(-a[i][j], 0);
			dp[i][j] = INF;
			for (int k = i; k < j; ++k)
				dp[i][j] = min(dp[i][j], dp[i][k] + dp[k + 1][j] + max(0, a[i][k + 1]) + cut(i + 1, k + 1, j));
		}
	cout << dp[0][n - 1];
	return 0;
}