/*
 * Decompiled with CFR 0.152.
 */
package org.apache.gobblin.example.wikipedia;

import com.google.common.base.Function;
import com.google.common.base.Predicate;
import com.google.common.base.Predicates;
import com.google.common.base.Splitter;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.gson.JsonElement;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import org.apache.gobblin.configuration.SourceState;
import org.apache.gobblin.configuration.WorkUnitState;
import org.apache.gobblin.example.wikipedia.WikipediaExtractor;
import org.apache.gobblin.source.extractor.Extractor;
import org.apache.gobblin.source.extractor.Watermark;
import org.apache.gobblin.source.extractor.WatermarkInterval;
import org.apache.gobblin.source.extractor.extract.AbstractSource;
import org.apache.gobblin.source.extractor.extract.LongWatermark;
import org.apache.gobblin.source.workunit.Extract;
import org.apache.gobblin.source.workunit.WorkUnit;

public class WikipediaSource
extends AbstractSource<String, JsonElement> {
    public static final String ARTICLE_TITLE = "gobblin.wikipediaSource.workUnit.title";

    public List<WorkUnit> getWorkunits(SourceState state) {
        Map previousWorkUnits = state.getPreviousWorkUnitStatesByDatasetUrns();
        LinkedList titles = new LinkedList(Splitter.on((String)",").omitEmptyStrings().splitToList((CharSequence)state.getProp("source.page.titles")));
        HashMap prevHighWatermarks = Maps.newHashMap();
        for (Map.Entry entry : previousWorkUnits.entrySet()) {
            Iterable watermarks = Iterables.transform((Iterable)((Iterable)entry.getValue()), (Function)new Function<WorkUnitState, LongWatermark>(){

                public LongWatermark apply(WorkUnitState wus) {
                    return (LongWatermark)wus.getActualHighWatermark(LongWatermark.class);
                }
            });
            ArrayList arrayList = Lists.newArrayList((Iterable)(watermarks = Iterables.filter((Iterable)watermarks, (Predicate)Predicates.notNull())));
            if (arrayList.size() <= 0) continue;
            prevHighWatermarks.put(entry.getKey(), Collections.max(arrayList));
        }
        Extract extract = this.createExtract(Extract.TableType.SNAPSHOT_ONLY, state.getProp("extract.namespace"), "WikipediaOutput");
        ArrayList workUnits = Lists.newArrayList();
        for (String string : titles) {
            LongWatermark prevWatermark = prevHighWatermarks.containsKey(string) ? (LongWatermark)prevHighWatermarks.get(string) : new LongWatermark(-1L);
            prevHighWatermarks.remove(string);
            WorkUnit workUnit = WorkUnit.create((Extract)extract, (WatermarkInterval)new WatermarkInterval((Watermark)prevWatermark, (Watermark)new LongWatermark(-1L)));
            workUnit.setProp("dataset.urn", (Object)string);
            workUnits.add(workUnit);
        }
        for (Map.Entry entry : prevHighWatermarks.entrySet()) {
            WorkUnit workUnit = WorkUnit.create((Extract)extract, (WatermarkInterval)new WatermarkInterval((Watermark)entry.getValue(), (Watermark)entry.getValue()));
            workUnit.setProp("dataset.urn", entry.getKey());
            workUnits.add(workUnit);
        }
        return workUnits;
    }

    public Extractor<String, JsonElement> getExtractor(WorkUnitState state) throws IOException {
        return new WikipediaExtractor(state);
    }

    public void shutdown(SourceState state) {
    }
}

