/*
 * Decompiled with CFR 0.152.
 */
package org.apache.manifoldcf.agents.transformation.htmlextractor;

import java.io.IOException;
import java.io.InputStream;
import java.util.Hashtable;
import java.util.List;
import org.apache.manifoldcf.crawler.system.Logging;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Entities;
import org.jsoup.parser.Parser;
import org.jsoup.safety.Whitelist;

public class JsoupProcessing {
    public static Hashtable<String, String> extractTextAndMetadataHtmlDocument(InputStream streamDoc, String whitelist, List<String> blacklist, boolean stripHtml) throws IOException {
        Element element_dcterms_identifier;
        Element element_dcterms_language;
        Element element_dcterms_format;
        Element element_dcterms_type;
        Element element_dcterms_date;
        Element element_dcterms_contributor;
        Element element_dcterms_publisher;
        Element element_dcterms_description;
        Element element_dcterms_creator;
        Element element_dcterms_title;
        Element element_dcterms_subject;
        Element element_author;
        Element element_description;
        Element element_keywords;
        Document doc = Jsoup.parse((InputStream)streamDoc, (String)"UTF-8", (String)"");
        doc.outputSettings().escapeMode(Entities.EscapeMode.xhtml);
        Hashtable<String, String> metadata = new Hashtable<String, String>();
        for (Element meta : doc.select("meta")) {
            Logging.connectors.debug((Object)("Name: " + meta.attr("name") + " - Content: " + meta.attr("content")));
            metadata.put(meta.attr("name"), meta.attr("content"));
        }
        if (doc.select("title") != null) {
            String title = doc.select("title").text();
            metadata.put("title", title);
        }
        if ((element_keywords = doc.select("meta[name='keywords']").first()) != null) {
            String keywords = element_keywords.attr("content");
            metadata.put("keywords", keywords);
        }
        if ((element_description = doc.select("meta[name=\"description\"]").first()) != null) {
            String description = element_description.attr("content");
            metadata.put("description", description);
        }
        if ((element_author = doc.select("meta[name=\"author\"]").first()) != null) {
            String author = element_author.attr("content");
            metadata.put("author", author);
        }
        if ((element_dcterms_subject = doc.select("meta[name=\"dcterms.subject\"]").first()) != null) {
            String dc_terms_subject = element_dcterms_subject.attr("content");
            metadata.put("dc_terms_subject", dc_terms_subject);
        }
        if ((element_dcterms_title = doc.select("meta[name=\"dcterms.title\"]").first()) != null) {
            String dc_terms_title = element_dcterms_title.attr("content");
            metadata.put("dc_terms_title", dc_terms_title);
        }
        if ((element_dcterms_creator = doc.select("meta[name=\"dcterms.creator\"]").first()) != null) {
            String dc_terms_creator = element_dcterms_creator.attr("content");
            metadata.put("dc_terms_creator", dc_terms_creator);
        }
        if ((element_dcterms_description = doc.select("meta[name=\"dcterms.description\"]").first()) != null) {
            String dc_terms_description = element_dcterms_description.attr("content");
            metadata.put("dc_terms_description", dc_terms_description);
        }
        if ((element_dcterms_publisher = doc.select("meta[name=\"dcterms.publisher\"]").first()) != null) {
            String dc_terms_publisher = element_dcterms_publisher.attr("content");
            metadata.put("dc_terms_publisher", dc_terms_publisher);
        }
        if ((element_dcterms_contributor = doc.select("meta[name=\"dcterms.contributor\"]").first()) != null) {
            String dc_terms_contributor = element_dcterms_contributor.attr("content");
            metadata.put("dc_terms_contributor", dc_terms_contributor);
        }
        if ((element_dcterms_date = doc.select("meta[name=\"dcterms.date\"]").first()) != null) {
            String dc_terms_date = element_dcterms_date.attr("content");
            metadata.put("dc_terms_date", dc_terms_date);
        }
        if ((element_dcterms_type = doc.select("meta[name=\"dcterms.type\"]").first()) != null) {
            String dc_terms_type = element_dcterms_type.attr("content");
            metadata.put("dc_terms_type", dc_terms_type);
        }
        if ((element_dcterms_format = doc.select("meta[name=\"dcterms.format\"]").first()) != null) {
            String dc_terms_format = element_dcterms_format.attr("content");
            metadata.put("dc_terms_format", dc_terms_format);
        }
        if ((element_dcterms_language = doc.select("meta[name=\"dcterms.language\"]").first()) != null) {
            String dc_terms_language = element_dcterms_language.attr("content");
            metadata.put("dc_terms_language", dc_terms_language);
        }
        if ((element_dcterms_identifier = doc.select("meta[name=\"dcterms.identifier\"]").first()) != null) {
            String dc_terms_identifier = element_dcterms_identifier.attr("content");
            metadata.put("dc_terms_identifier", dc_terms_identifier);
        }
        Element docToKeep = doc.body();
        if (whitelist != "body") {
            docToKeep = doc.select(whitelist).first();
            if (doc.select(whitelist).size() == 0) {
                docToKeep = doc.select("body").first();
            }
        }
        if (blacklist != null) {
            for (int i = 0; i < blacklist.size(); ++i) {
                docToKeep.select(blacklist.get(i)).remove();
            }
        }
        String finalDoc = stripHtml ? Jsoup.clean((String)docToKeep.html(), (String)"", (Whitelist)Whitelist.none(), (Document.OutputSettings)new Document.OutputSettings().prettyPrint(false)) : Jsoup.clean((String)docToKeep.html(), (Whitelist)Whitelist.relaxed());
        finalDoc = Parser.unescapeEntities((String)finalDoc, (boolean)true);
        metadata.put("extractedDoc", finalDoc);
        return metadata;
    }
}

