/*
    BFilter - a smart ad-filtering web proxy
    Copyright (C) 2002-2006  Joseph Artsimovich <joseph_a@mail.ru>

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/

#include "pch.h"

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include "HtmlContentValidator.h"
#include "HtmlDetector.h"
#include "SplittableBuffer.h"
#include "HttpResponseMetadata.h"
#include "FilterGroupTag.h"

using namespace std;

std::string const HtmlContentValidator::m_sHtmlFlag("_HTML_");
std::string const HtmlContentValidator::m_sXhtmlFlag("_XHTML_");
std::string const HtmlContentValidator::m_sHtmlOrXhtmlFlag("_HTML_OR_XHTML_");

HtmlContentValidator::HtmlContentValidator(
	ResponseFilterChain& chain, bool xhtml)
:	ResponseFilterBase(chain),
	m_isXhtml(xhtml)
{
}

HtmlContentValidator::~HtmlContentValidator()
{
}

void
HtmlContentValidator::processMetadata(auto_ptr<HttpResponseMetadata> metadata)
{
	// Content-Type is checked in FilterTryList::tryHtmlContentValidator()
	m_ptrMetadata = metadata;
	// We could output the metadata right now, but that would cause the
	// immediate creation of the next filter in chain, which would not
	// see the flags we are going to set here.
}

void
HtmlContentValidator::processBodyData(SplittableBuffer& data, bool eof)
{
	if (!m_ptrMetadata.get()) {
		outputBodyData(data, eof);
		return;
	}
	
	if (!m_ptrHtmlDetector.get()) {
		if (!data.empty() && *data.begin() == '<') {
			// optimize for common case
			setFlags();
			outputMetadata(m_ptrMetadata);
			outputBodyData(data, eof);
			return;
		}
		
		m_ptrHtmlDetector.reset(new HtmlDetector);
	}
	
	m_ptrHtmlDetector->consume(data, eof);
	HtmlDetector::Status status = m_ptrHtmlDetector->getStatus();
	switch (status) {
		case HtmlDetector::IN_PROGRESS: {
			if (m_ptrHtmlDetector->bufferedData().size() > MAX_BUFFERED_SIZE) {
				onResult(true, eof);
			}
			break;
		}
		case HtmlDetector::HTML_DETECTED:
		case HtmlDetector::EMPTY_DOC: {
			onResult(true, eof);
			break;
		}
		case HtmlDetector::HTML_NOT_DETECTED: {
			onResult(false, eof);
			break;
		}
	}
}

void
HtmlContentValidator::setFlags()
{
	WildcardFilterGroupTag tag;
	getFilterChain().setFlag(tag, m_isXhtml ? m_sXhtmlFlag : m_sHtmlFlag);
	getFilterChain().setFlag(tag, m_sHtmlOrXhtmlFlag);
}

void
HtmlContentValidator::onResult(bool detected, bool eof)
{
	if (detected) {
		setFlags();
	}
	SplittableBuffer data;
	m_ptrHtmlDetector->reset(data);
	m_ptrHtmlDetector.reset(0);
	outputMetadata(m_ptrMetadata);
	outputBodyData(data, eof);
}
