#!/usr/bin/env python
#
# bughelper-doc -- BugHelper's wiki documentation retrieval tool
#
# Copyright (C) 2007  Alex Muntada <ubuntu@alexm.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#

import urllib2
import re

baseURL = "https://wiki.ubuntu.com/"
actionSearch = "?action=fullsearch&value=BugHelper&titlesearch=Titles"
actionRaw = "?action=raw"
wikiAction = actionRaw
fileExt = ".txt"


def openURL(url):
    req = urllib2.Request(url)
    version = "bughelper-doc/0.1"
    req.add_header('User-agent',version)
    sock = urllib2.urlopen(req)
    content = sock.read()
    sock.close() 
    return content


def stripHeaderFooter(text):
    headerFilter = re.compile(".*\\[\\[TableOfContents.*?\\|\\|(?:\r\n)*",\
                              re.MULTILINE + re.DOTALL)
    footerFilter = re.compile("(?:\r\n)*^----\r\nGo back to .*",\
                              re.MULTILINE + re.DOTALL)
    strip = headerFilter.sub("", text)
    strip = footerFilter.sub("\r\n", strip)
    return strip


def bugHelperReadme():
    readmeContent = openURL(baseURL + "BugHelper" + wikiAction)
    readmeContent = stripHeaderFooter(readmeContent)
    return readmeContent


def getBugHelperDocPages(contents):
    docPageList = openURL(baseURL + actionSearch);
    docPageMask = "href=\"/BugHelper/(doc/[^?]+)\\?highlight="
    wikiFilter = re.compile(docPageMask, re.MULTILINE)
    filteredPages = wikiFilter.findall(docPageList)
    for page in filteredPages:
        pageContent = openURL(baseURL + "BugHelper/" + page + wikiAction)
        pageContent = stripHeaderFooter(pageContent)
        contents[page] = pageContent


readme = bugHelperReadme()
file = open("doc/README", 'w')
file.write(readme)
file.close()

docs = {}
getBugHelperDocPages(docs)
for pathName, text in docs.items():
    file = open(pathName + fileExt, 'w')
    file.write(docs[pathName])
    file.close()

