/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.scoref;

import edu.stanford.nlp.hcoref.data.CorefCluster;
import edu.stanford.nlp.hcoref.data.Document;
import edu.stanford.nlp.hcoref.data.Mention;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.scoref.DocumentProcessor;
import edu.stanford.nlp.scoref.StatisticalCorefTrainer;
import edu.stanford.nlp.scoref.StatisticalCorefUtils;
import edu.stanford.nlp.util.Pair;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.stream.Collectors;

public class DatasetBuilder
implements DocumentProcessor {
    private final int maxExamplesPerDocument;
    private final double minClassImbalancedPerDocument;
    private final Map<Integer, Map<Pair<Integer, Integer>, Boolean>> mentionPairs;
    private final Random random;

    public DatasetBuilder() {
        this(0.0, Integer.MAX_VALUE);
    }

    public DatasetBuilder(double minClassImbalancedPerDocument, int maxExamplesPerDocument) {
        this.maxExamplesPerDocument = maxExamplesPerDocument;
        this.minClassImbalancedPerDocument = minClassImbalancedPerDocument;
        this.mentionPairs = new HashMap<Integer, Map<Pair<Integer, Integer>, Boolean>>();
        this.random = new Random(0L);
    }

    @Override
    public void process(int id, Document document) {
        List negative;
        int numN;
        Map<Pair<Integer, Integer>, Boolean> labeledPairs = StatisticalCorefUtils.getUnlabeledMentionPairs(document);
        for (CorefCluster c : document.goldCorefClusters.values()) {
            ArrayList<Mention> clusterMentions = new ArrayList<Mention>(c.getCorefMentions());
            for (int i = 0; i < clusterMentions.size(); ++i) {
                for (Mention mention : clusterMentions) {
                    Pair<Integer, Integer> pair = new Pair<Integer, Integer>(((Mention)clusterMentions.get((int)i)).mentionID, mention.mentionID);
                    if (!labeledPairs.containsKey(pair)) continue;
                    labeledPairs.put(pair, true);
                }
            }
        }
        long numP = labeledPairs.keySet().stream().filter(m -> (Boolean)labeledPairs.get(m)).count();
        if ((double)((float)numP / (float)(numP + (long)(numN = (negative = labeledPairs.keySet().stream().filter(m -> (Boolean)labeledPairs.get(m) == false).collect(Collectors.toList())).size()))) < this.minClassImbalancedPerDocument) {
            numN = (int)((double)numP / this.minClassImbalancedPerDocument - (double)numP);
            Collections.shuffle(negative);
            for (int i = numN; i < negative.size(); ++i) {
                labeledPairs.remove(negative.get(i));
            }
        }
        HashMap mentionToCandidateAntecedents = new HashMap();
        for (Pair<Integer, Integer> pair : labeledPairs.keySet()) {
            ArrayList candidateAntecedents = (ArrayList)mentionToCandidateAntecedents.get(pair.second);
            if (candidateAntecedents == null) {
                candidateAntecedents = new ArrayList();
                mentionToCandidateAntecedents.put(pair.second, candidateAntecedents);
            }
            candidateAntecedents.add(pair.first);
        }
        ArrayList arrayList = new ArrayList(mentionToCandidateAntecedents.keySet());
        while (labeledPairs.size() > this.maxExamplesPerDocument) {
            int n = (Integer)arrayList.remove(this.random.nextInt(arrayList.size()));
            Iterator iterator = ((List)mentionToCandidateAntecedents.get(n)).iterator();
            while (iterator.hasNext()) {
                int candidateAntecedent = (Integer)iterator.next();
                labeledPairs.remove(new Pair<Integer, Integer>(candidateAntecedent, n));
            }
        }
        this.mentionPairs.put(id, labeledPairs);
    }

    @Override
    public void finish() throws Exception {
        IOUtils.writeObjectToFile(this.mentionPairs, StatisticalCorefTrainer.datasetFile);
    }
}

