(Analysis by William Lin)

First, let's observe some properties about the optimal solution.

We can use DP for this problem: $dp[i][s]$ is the min cost to cover all packages $\le i$ where $s$ is one of five states:

To transition from $dp[i]$ to $dp[j]$ (assuming no cows or packages in $(i, j)$), we add a cost of $j-i$ for states 1 and 3 and $2(j-i)$ for states 2 and 4. Depending on whether $j$ has a cow or a package, we can then transition between $dp[j]$.

Unfortunately, we can have up to $10^{18}$ cows and packages so doing this DP naively is not feasible. However, note that we can describe the transitions with min-plus matrix multiplication: $dp[j]=dp[i] A_{i,j}$. If we have many repeated chunks of size $M$ between $i$ and $j$, we can use binary exponentiation to efficiently find the transition matrix from the beginning since $A_{i, j}=(A_{i, i+M})^{(j-i)/M}$.

We will consider the number line in chunks of $M$. Adjacent chunks are equivalent except when an endpoint of a range of cows or packages lies in one of the chunks, which happens at most $O(N+P)$ times. We can sweep through the endpoints of these ranges while maintaining a segment tree that stores all active cows and packages in the current chunk and allows range queries for transition matrices. Between endpoints, we will need to perform a constant number of segment tree queries and also binary exponentiation, which will be $O(\log(N+P)+\log C)$, where $C$ is the maximum number of chunks, bounded by $\frac{10^{18}}{M}$ in this problem. The final time complexity is $O((N+P)(\log(N+P)+\log C))$.

#include <bits/stdc++.h>
using namespace std;
 
#define ll long long
#define DP array<array<ll, 5>, 5>
 
DP dptrans(const DP a, const DP b) {
    DP c;
    memset(&c, 0x3f, sizeof(c));
    for(int i=0; i<5; ++i)
        for(int j=0; j<5; ++j)
            for(int k=0; k<5; ++k)
                c[i][j]=min(c[i][j], a[i][k]+b[k][j]);
    return c;
}
 
const int mxNP=4e4;
ll m, l[mxNP], r[mxNP];
int n, p;
array<ll, 2> ps[2*mxNP+1];
array<ll, 3> es[2*mxNP+1];
DP st[1<<18];
 
void upd(int l1, int x, int i=1, int l2=0, int r2=2*(n+p)-1) {
    if(l2==r2) {
        memset(&st[i], 0x3f, sizeof(st[i]));
        if(!x)
            st[i][0][4]=st[i][0][0]=st[i][1][4]=st[i][1][1]=st[i][4][2]=st[i][2][2]=st[i][4][3]=st[i][3][3]=0;
        else if(x&1)
            st[i][4][4]=st[i][2][1]=st[i][3][0]=st[i][2][4]=st[i][4][0]=0;
        else
            st[i][0][0]=st[i][1][1]=st[i][2][2]=st[i][3][3]=st[i][4][4]=0;
        for(int j=0; j<5; ++j)
            for(int k=0; k<4; ++k)
                st[i][j][k]+=(ps[l1+1][0]-ps[l1][0])*(1+k%2);
        return;
    }
    int m2=(l2+r2)/2;
    if(l1<=m2)
        upd(l1, x, 2*i, l2, m2);
    else
        upd(l1, x, 2*i+1, m2+1, r2);
    st[i]=dptrans(st[2*i], st[2*i+1]);
}
 
DP qry(int l1, int r1, int i=1, int l2=0, int r2=2*(n+p)-1) {
    if(l1<=l2&&r2<=r1)
        return st[i];
    int m2=(l2+r2)/2;
    if(r1<=m2)
        return qry(l1, r1, 2*i, l2, m2);
    if(m2<l1)
        return qry(l1, r1, 2*i+1, m2+1, r2);
    return dptrans(qry(l1, r1, 2*i, l2, m2), qry(l1, r1, 2*i+1, m2+1, r2));
}
 
int main() {
    ios::sync_with_stdio(0);
    cin.tie(0);
 
    cin >> m >> n >> p;
    for(int i=0; i<n+p; ++i) {
        cin >> l[i] >> r[i];
        ps[i]={l[i]%m, i};
    }
    sort(ps, ps+n+p);
    for(int i=0; i<n+p; ++i) {
    	ps[n+p+i]={ps[i][0]+m};
        es[2*i]={l[ps[i][1]]/m, i, ps[i][1]<n};
        es[2*i+1]={r[ps[i][1]]/m+1, i, 2};
    }
    es[2*(n+p)]={-1, 0};
    sort(es, es+2*(n+p)+1);
    for(int i=0; i<2*(n+p); ++i)
        upd(i, 2);
    DP ans;
    memset(&ans, 0x3f, sizeof(ans));
    ans[4][4]=0;
    for(int i=1; i<=2*(n+p); ++i) {
        ans=dptrans(ans, qry(es[i-1][1], es[i][1]-1+(es[i-1][0]<es[i][0]?n+p:0)));
        ll rl=es[i][0]-es[i-1][0]-1;
        if(rl>0) {
            DP b=qry(es[i][1], es[i][1]-1+n+p);
            while(rl) {
                if(rl&1)
                    ans=dptrans(ans, b);
                b=dptrans(b, b);
                rl/=2;
            }
        }
        upd(es[i][1], es[i][2]);
        upd(es[i][1]+n+p, es[i][2]);
    }
    cout << ans[4][4] << "\n";
}