/*
 * Decompiled with CFR 0.152.
 */
package org.apache.gobblin.example.wikipedia;

import com.google.common.base.Charsets;
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import com.google.common.io.Closer;
import com.google.gson.Gson;
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import java.io.BufferedReader;
import java.io.Closeable;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Map;
import java.util.Queue;
import org.apache.commons.lang3.StringUtils;
import org.apache.gobblin.configuration.ConfigurationKeys;
import org.apache.gobblin.configuration.State;
import org.apache.gobblin.configuration.WorkUnitState;
import org.apache.gobblin.http.HttpClientConfigurator;
import org.apache.gobblin.http.HttpClientConfiguratorLoader;
import org.apache.gobblin.source.extractor.DataRecordException;
import org.apache.gobblin.source.extractor.Extractor;
import org.apache.gobblin.source.extractor.Watermark;
import org.apache.gobblin.source.extractor.extract.LongWatermark;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.client.utils.URLEncodedUtils;
import org.apache.http.message.BasicNameValuePair;
import org.joda.time.DateTime;
import org.joda.time.Period;
import org.joda.time.ReadableInstant;
import org.joda.time.ReadablePeriod;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class WikipediaExtractor
implements Extractor<String, JsonElement> {
    private static final Logger LOG = LoggerFactory.getLogger(WikipediaExtractor.class);
    private static final DateTimeFormatter WIKIPEDIA_TIMESTAMP_FORMAT = DateTimeFormat.forPattern((String)"YYYYMMddHHmmss");
    public static final String CONFIG_PREFIX = "gobblin.wikipediaSource.";
    public static final String MAX_REVISION_PER_PAGE = "gobblin.wikipediaSource.maxRevisionsPerPage";
    public static final int DEFAULT_MAX_REVISIONS_PER_PAGE = -1;
    public static final String HTTP_CLIENT_CONFIG_PREFIX = "gobblin.wikipediaSource.httpClient.";
    public static final String SOURCE_PAGE_TITLES = "source.page.titles";
    public static final String BOOTSTRAP_PERIOD = "wikipedia.source.bootstrap.lookback";
    public static final String DEFAULT_BOOTSTRAP_PERIOD = "P2D";
    public static final String WIKIPEDIA_API_ROOTURL = "wikipedia.api.rooturl";
    public static final String WIKIPEDIA_AVRO_SCHEMA = "wikipedia.avro.schema";
    private static final String JSON_MEMBER_QUERY = "query";
    private static final String JSON_MEMBER_PAGES = "pages";
    private static final String JSON_MEMBER_REVISIONS = "revisions";
    private static final String JSON_MEMBER_PAGEID = "pageid";
    private static final String JSON_MEMBER_TITLE = "title";
    private static final Gson GSON = new Gson();
    private final WikiResponseReader reader;
    private final String rootUrl;
    private final String schema;
    private final String requestedTitle;
    private final int batchSize;
    private final long lastRevisionId;
    private Queue<JsonElement> currentBatch;
    private final ImmutableMap<String, String> baseQuery;
    private final WorkUnitState workUnitState;
    private final int maxRevisionsPulled;
    private final HttpClientConfigurator httpClientConfigurator;
    private HttpClient httpClient;

    public WikipediaExtractor(WorkUnitState workUnitState) throws IOException {
        this.workUnitState = workUnitState;
        this.rootUrl = this.readProp(WIKIPEDIA_API_ROOTURL, workUnitState);
        this.schema = this.readProp(WIKIPEDIA_AVRO_SCHEMA, workUnitState);
        this.batchSize = 5;
        this.requestedTitle = workUnitState.getProp("dataset.urn");
        this.baseQuery = ImmutableMap.builder().put((Object)"format", (Object)"json").put((Object)"action", (Object)JSON_MEMBER_QUERY).put((Object)"prop", (Object)JSON_MEMBER_REVISIONS).build();
        HttpClientConfiguratorLoader httpClientConfiguratorLoader = new HttpClientConfiguratorLoader((State)workUnitState);
        this.httpClientConfigurator = httpClientConfiguratorLoader.getConfigurator();
        this.httpClientConfigurator.setStatePropertiesPrefix(HTTP_CLIENT_CONFIG_PREFIX).configure((State)workUnitState);
        try {
            Queue<JsonElement> lastRevision = this.retrievePageRevisions((Map<String, String>)ImmutableMap.builder().putAll(this.baseQuery).put((Object)"rvprop", (Object)"ids").put((Object)"titles", (Object)this.requestedTitle).put((Object)"rvlimit", (Object)"1").build());
            this.lastRevisionId = lastRevision.isEmpty() ? -1L : this.parseRevision(lastRevision.poll());
        }
        catch (URISyntaxException use) {
            throw new IOException(use);
        }
        long baseRevision = ((LongWatermark)workUnitState.getWorkunit().getLowWatermark(LongWatermark.class, new Gson())).getValue();
        if (baseRevision < 0L) {
            try {
                baseRevision = this.createLowWatermarkForBootstrap(workUnitState);
            }
            catch (IOException ioe) {
                baseRevision = this.lastRevisionId;
            }
        }
        this.reader = new WikiResponseReader(baseRevision);
        workUnitState.setActualHighWatermark((Watermark)new LongWatermark(this.lastRevisionId));
        this.currentBatch = new LinkedList<JsonElement>();
        LOG.info(String.format("Will pull revisions %s to %s for page %s.", this.reader.lastPulledRevision, this.lastRevisionId, this.requestedTitle));
        this.maxRevisionsPulled = workUnitState.getPropAsInt(MAX_REVISION_PER_PAGE, -1);
    }

    private long parseRevision(JsonElement element) {
        return element.getAsJsonObject().get("revid").getAsLong();
    }

    private long createLowWatermarkForBootstrap(WorkUnitState state) throws IOException {
        String bootstrapPeriodString = state.getProp(BOOTSTRAP_PERIOD, DEFAULT_BOOTSTRAP_PERIOD);
        Period period = Period.parse((String)bootstrapPeriodString);
        DateTime startTime = DateTime.now().minus((ReadablePeriod)period);
        try {
            Queue<JsonElement> firstRevision = this.retrievePageRevisions((Map<String, String>)ImmutableMap.builder().putAll(this.baseQuery).put((Object)"rvprop", (Object)"ids").put((Object)"titles", (Object)this.requestedTitle).put((Object)"rvlimit", (Object)"1").put((Object)"rvstart", (Object)WIKIPEDIA_TIMESTAMP_FORMAT.print((ReadableInstant)startTime)).put((Object)"rvdir", (Object)"newer").build());
            if (firstRevision.isEmpty()) {
                throw new IOException("Could not retrieve oldest revision, returned empty revisions list.");
            }
            return this.parseRevision(firstRevision.poll());
        }
        catch (URISyntaxException use) {
            throw new IOException(use);
        }
    }

    private String readProp(String key, WorkUnitState workUnitState) {
        String value = workUnitState.getWorkunit().getProp(key);
        if (StringUtils.isBlank((CharSequence)value)) {
            value = workUnitState.getProp(key);
        }
        if (StringUtils.isBlank((CharSequence)value)) {
            value = workUnitState.getJobState().getProp(key);
        }
        return value;
    }

    private JsonElement performHttpQuery(String rootUrl, Map<String, String> query) throws URISyntaxException, IOException {
        if (null == this.httpClient) {
            this.httpClient = this.createHttpClient();
        }
        HttpUriRequest req = this.createHttpRequest(rootUrl, query);
        Closer closer = Closer.create();
        StringBuilder sb = new StringBuilder();
        try {
            String line;
            HttpResponse response = this.sendHttpRequest(req, this.httpClient);
            if (response instanceof CloseableHttpResponse) {
                closer.register((Closeable)((CloseableHttpResponse)response));
            }
            BufferedReader br = (BufferedReader)closer.register((Closeable)new BufferedReader(new InputStreamReader(response.getEntity().getContent(), ConfigurationKeys.DEFAULT_CHARSET_ENCODING)));
            while ((line = br.readLine()) != null) {
                sb.append(line + "\n");
            }
        }
        catch (Throwable t) {
            throw closer.rethrow(t);
        }
        finally {
            try {
                closer.close();
            }
            catch (IOException e) {
                LOG.error("IOException in Closer.close() while performing query " + req + ": " + e, (Throwable)e);
            }
        }
        if (Strings.isNullOrEmpty((String)sb.toString())) {
            LOG.warn("Received empty response for query: " + req);
            return new JsonObject();
        }
        JsonElement jsonElement = (JsonElement)GSON.fromJson(sb.toString(), JsonElement.class);
        return jsonElement;
    }

    public static URI createRequestURI(String rootUrl, Map<String, String> query) throws MalformedURLException, URISyntaxException {
        ArrayList queryTokens = Lists.newArrayList();
        for (Map.Entry<String, String> entry : query.entrySet()) {
            queryTokens.add(new BasicNameValuePair(entry.getKey(), entry.getValue()));
        }
        String encodedQuery = URLEncodedUtils.format((Iterable)queryTokens, (Charset)Charsets.UTF_8);
        URI actualURL = new URIBuilder(rootUrl).setQuery(encodedQuery).build();
        return actualURL;
    }

    HttpUriRequest createHttpRequest(String rootUrl, Map<String, String> query) throws MalformedURLException, URISyntaxException {
        URI requestUri = WikipediaExtractor.createRequestURI(rootUrl, query);
        HttpGet req = new HttpGet(requestUri);
        return req;
    }

    HttpResponse sendHttpRequest(HttpUriRequest req, HttpClient httpClient) throws ClientProtocolException, IOException {
        LOG.debug("Sending request {}", (Object)req);
        HttpResponse response = httpClient.execute(req);
        if (response.getStatusLine().getStatusCode() != 200 || null == response.getEntity()) {
            if (response instanceof CloseableHttpResponse) {
                ((CloseableHttpResponse)response).close();
            }
            throw new IOException("HTTP Request " + req + " returned unexpected response " + response);
        }
        return response;
    }

    private Queue<JsonElement> retrievePageRevisions(Map<String, String> query) throws IOException, URISyntaxException {
        LinkedList<JsonElement> retrievedRevisions = new LinkedList<JsonElement>();
        JsonElement jsonElement = this.performHttpQuery(this.rootUrl, query);
        if (jsonElement == null || !jsonElement.isJsonObject()) {
            return retrievedRevisions;
        }
        JsonObject jsonObj = jsonElement.getAsJsonObject();
        if (jsonObj == null || !jsonObj.has(JSON_MEMBER_QUERY)) {
            return retrievedRevisions;
        }
        JsonObject queryObj = jsonObj.getAsJsonObject(JSON_MEMBER_QUERY);
        if (!queryObj.has(JSON_MEMBER_PAGES)) {
            return retrievedRevisions;
        }
        JsonObject pagesObj = queryObj.getAsJsonObject(JSON_MEMBER_PAGES);
        if (pagesObj.entrySet().isEmpty()) {
            return retrievedRevisions;
        }
        JsonObject pageIdObj = pagesObj.getAsJsonObject((String)((Map.Entry)pagesObj.entrySet().iterator().next()).getKey());
        if (!pageIdObj.has(JSON_MEMBER_REVISIONS)) {
            return retrievedRevisions;
        }
        JsonArray jsonArr = pageIdObj.getAsJsonArray(JSON_MEMBER_REVISIONS);
        for (JsonElement revElement : jsonArr) {
            JsonObject revObj = revElement.getAsJsonObject();
            if (pageIdObj.has(JSON_MEMBER_PAGEID)) {
                revObj.add(JSON_MEMBER_PAGEID, pageIdObj.get(JSON_MEMBER_PAGEID));
            }
            if (pageIdObj.has(JSON_MEMBER_TITLE)) {
                revObj.add(JSON_MEMBER_TITLE, pageIdObj.get(JSON_MEMBER_TITLE));
            }
            retrievedRevisions.add((JsonElement)revObj);
        }
        LOG.info(retrievedRevisions.size() + " record(s) retrieved for title " + this.requestedTitle);
        return retrievedRevisions;
    }

    protected HttpClient createHttpClient() {
        return this.httpClientConfigurator.createClient();
    }

    public void close() throws IOException {
        if (null != this.httpClient && this.httpClient instanceof Closeable) {
            ((Closeable)this.httpClient).close();
        }
    }

    public String getSchema() {
        return this.schema;
    }

    public JsonElement readRecord(@Deprecated JsonElement reuse) throws DataRecordException, IOException {
        if (this.reader == null) {
            return null;
        }
        if (this.reader.hasNext()) {
            return this.reader.next();
        }
        return null;
    }

    public long getExpectedRecordCount() {
        return 0L;
    }

    public long getHighWatermark() {
        return this.lastRevisionId;
    }

    private class WikiResponseReader
    implements Iterator<JsonElement> {
        private long lastPulledRevision;
        private long revisionsPulled = 0L;

        public WikiResponseReader(long latestPulledRevision) {
            this.lastPulledRevision = latestPulledRevision;
        }

        @Override
        public boolean hasNext() {
            if (WikipediaExtractor.this.maxRevisionsPulled > -1 && this.revisionsPulled >= (long)WikipediaExtractor.this.maxRevisionsPulled) {
                WikipediaExtractor.this.workUnitState.setActualHighWatermark((Watermark)new LongWatermark(this.lastPulledRevision));
                LOG.info("Pulled max number of records {}, final revision pulled {}.", (Object)this.revisionsPulled, (Object)this.lastPulledRevision);
                return false;
            }
            if (!WikipediaExtractor.this.currentBatch.isEmpty()) {
                return true;
            }
            if (this.lastPulledRevision >= WikipediaExtractor.this.lastRevisionId) {
                return false;
            }
            try {
                WikipediaExtractor.this.currentBatch = WikipediaExtractor.this.retrievePageRevisions((Map)ImmutableMap.builder().putAll((Map)WikipediaExtractor.this.baseQuery).put((Object)"rvprop", (Object)"ids|timestamp|user|userid|size").put((Object)"titles", (Object)WikipediaExtractor.this.requestedTitle).put((Object)"rvlimit", (Object)Integer.toString(WikipediaExtractor.this.batchSize + 1)).put((Object)"rvstartid", (Object)Long.toString(this.lastPulledRevision)).put((Object)"rvendid", (Object)Long.toString(WikipediaExtractor.this.lastRevisionId)).put((Object)"rvdir", (Object)"newer").build());
                WikipediaExtractor.this.currentBatch.poll();
            }
            catch (IOException | URISyntaxException use) {
                LOG.error("Could not retrieve more revisions.", (Throwable)use);
                return false;
            }
            return !WikipediaExtractor.this.currentBatch.isEmpty();
        }

        @Override
        public JsonElement next() {
            if (!this.hasNext()) {
                return null;
            }
            JsonElement element = (JsonElement)WikipediaExtractor.this.currentBatch.poll();
            this.lastPulledRevision = WikipediaExtractor.this.parseRevision(element);
            ++this.revisionsPulled;
            return element;
        }

        @Override
        public void remove() {
            throw new UnsupportedOperationException();
        }
    }
}

