
from zipfile import ZIP_STORED, ZIP_DEFLATED
from pypy.rlib.streamio import open_file_as_stream
from pypy.rlib.rstruct.runpack import runpack
from pypy.rlib.rarithmetic import r_uint, intmask
from pypy.rpython.tool.rffi_platform import CompilationError
import os

try:
    from pypy.rlib import rzlib
except (ImportError, CompilationError):
    rzlib = None

# XXX hack to get crc32 to work
from pypy.module.binascii.interp_crc32 import crc_32_tab

rcrc_32_tab = [r_uint(i) for i in crc_32_tab]

def crc32(s, crc=0):
    result = 0
    crc = ~r_uint(crc) & r_uint(0xffffffffL)
    for c in s:
        crc = rcrc_32_tab[(crc ^ r_uint(ord(c))) & 0xffL] ^ (crc >> 8)
        #/* Note:  (crc >> 8) MUST zero fill on left

        result = crc ^ r_uint(0xffffffffL)

    return result

# parts copied from zipfile library implementation

class BadZipfile(Exception):
    pass

# Here are some struct module formats for reading headers
structEndArchive = "<4s4H2lH"     # 9 items, end of archive, 22 bytes
stringEndArchive = "PK\005\006"   # magic number for end of archive record
structCentralDir = "<4s4B4HlLL5HLl"# 19 items, central directory, 46 bytes
stringCentralDir = "PK\001\002"   # magic number for central directory
structFileHeader = "<4s2B4HlLL2H"  # 12 items, file header record, 30 bytes
stringFileHeader = "PK\003\004"   # magic number for file header

# indexes of entries in the central directory structure
_CD_SIGNATURE = 0
_CD_CREATE_VERSION = 1
_CD_CREATE_SYSTEM = 2
_CD_EXTRACT_VERSION = 3
_CD_EXTRACT_SYSTEM = 4                  # is this meaningful?
_CD_FLAG_BITS = 5
_CD_COMPRESS_TYPE = 6
_CD_TIME = 7
_CD_DATE = 8
_CD_CRC = 9
_CD_COMPRESSED_SIZE = 10
_CD_UNCOMPRESSED_SIZE = 11
_CD_FILENAME_LENGTH = 12
_CD_EXTRA_FIELD_LENGTH = 13
_CD_COMMENT_LENGTH = 14
_CD_DISK_NUMBER_START = 15
_CD_INTERNAL_FILE_ATTRIBUTES = 16
_CD_EXTERNAL_FILE_ATTRIBUTES = 17
_CD_LOCAL_HEADER_OFFSET = 18

# indexes of entries in the local file header structure
_FH_SIGNATURE = 0
_FH_EXTRACT_VERSION = 1
_FH_EXTRACT_SYSTEM = 2                  # is this meaningful?
_FH_GENERAL_PURPOSE_FLAG_BITS = 3
_FH_COMPRESSION_METHOD = 4
_FH_LAST_MOD_TIME = 5
_FH_LAST_MOD_DATE = 6
_FH_CRC = 7
_FH_COMPRESSED_SIZE = 8
_FH_UNCOMPRESSED_SIZE = 9
_FH_FILENAME_LENGTH = 10
_FH_EXTRA_FIELD_LENGTH = 11

class EndRecStruct(object):
    def __init__(self, stuff, comment, filesize):
        self.stuff = stuff
        self.comment = comment
        self.filesize = filesize

def _EndRecData(fpin):
    """Return data from the "End of Central Directory" record, or None.

    The data is a list of the nine items in the ZIP "End of central dir"
    record followed by a tenth item, the file seek offset of this record."""
    fpin.seek(-22, 2)               # Assume no archive comment.
    filesize = fpin.tell() + 22     # Get file size
    data = fpin.readall()
    start = len(data)-2
    assert start > 0
    if data[0:4] == stringEndArchive and data[start:] == "\000\000":
        endrec = runpack(structEndArchive, data)
        return EndRecStruct(endrec, "", filesize - 22)
    # Search the last END_BLOCK bytes of the file for the record signature.
    # The comment is appended to the ZIP file and has a 16 bit length.
    # So the comment may be up to 64K long.  We limit the search for the
    # signature to a few Kbytes at the end of the file for efficiency.
    # also, the signature must not appear in the comment.
    END_BLOCK = min(filesize, 1024 * 4)
    fpin.seek(filesize - END_BLOCK, 0)
    data = fpin.readall()
    start = data.rfind(stringEndArchive)
    if start >= 0:     # Correct signature string was found
        endrec = runpack(structEndArchive, data[start:start+22])
        comment = data[start+22:]
        if endrec[7] == len(comment):     # Comment length checks out
            # Append the archive comment and start offset
            return EndRecStruct(endrec, comment, filesize - END_BLOCK + start)
    return      # Error, return None

class RZipInfo(object):
    def __init__(self, filename, date_time=(1980,1,1,0,0,0)):
        self.orig_filename = filename
        null_byte = filename.find(chr(0))
        if null_byte >= 0:
            filename = filename[0:null_byte]
# This is used to ensure paths in generated ZIP files always use
# forward slashes as the directory separator, as required by the
# ZIP format specification.
        if os.sep != "/":
            filename = filename.replace(os.sep, "/")
        self.filename = filename        # Normalized file name
        self.date_time = date_time      # year, month, day, hour, min, sec
        # Standard values:
        self.compress_type = ZIP_STORED # Type of compression for the file
        self.comment = ""               # Comment for each file
        self.extra = ""                 # ZIP extra data
        self.create_system = 0          # System which created ZIP archive
        self.create_version = 20        # Version which created ZIP archive
        self.extract_version = 20       # Version needed to extract archive
        self.reserved = 0               # Must be zero
        self.flag_bits = 0              # ZIP flag bits
        self.volume = 0                 # Volume number of file header
        self.internal_attr = 0          # Internal attributes
        self.external_attr = 0          # External file attributes
        # Other attributes are set by class ZipFile:
        # header_offset         Byte offset to the file header
        # file_offset           Byte offset to the start of the file data
        # CRC                   CRC-32 of the uncompressed file
        # compress_size         Size of the compressed file
        # file_size             Size of the uncompressed file

class RZipFile(object):
    def __init__(self, zipname, mode='r', compression=ZIP_STORED):
        if mode != 'r':
            raise TypeError("Read only support by now")
        self.compression = compression
        self.filename = zipname
        self.filelist = []
        self.NameToInfo = {}
        if 'b' not in mode:
            mode += 'b'
        self.mode = mode
        fp = self.get_fp()
        try:
            self._GetContents(fp)
        finally:
            fp.close()

    def get_fp(self):
        return open_file_as_stream(self.filename, self.mode, 1024)

    def _GetContents(self, fp):
        endrec = _EndRecData(fp)
        if not endrec:
            raise BadZipfile, "File is not a zip file"
        size_cd = endrec.stuff[5]             # bytes in central directory
        offset_cd = endrec.stuff[6]   # offset of central directory
        self.comment = endrec.comment
        x = endrec.filesize - size_cd
        concat = x - offset_cd
        self.start_dir = offset_cd + concat
        fp.seek(self.start_dir, 0)
        total = 0
        while total < size_cd:
            centdir = fp.read(46)
            total = total + 46
            if centdir[0:4] != stringCentralDir:
                raise BadZipfile, "Bad magic number for central directory"
            centdir = runpack(structCentralDir, centdir)
            filename = fp.read(centdir[_CD_FILENAME_LENGTH])
            # Create ZipInfo instance to store file information
            x = RZipInfo(filename)
            x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
            x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
            total = (total + centdir[_CD_FILENAME_LENGTH]
                     + centdir[_CD_EXTRA_FIELD_LENGTH]
                     + centdir[_CD_COMMENT_LENGTH])
            x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] + concat
            # file_offset must be computed below...
            (x.create_version, x.create_system, x.extract_version, x.reserved,
                x.flag_bits, x.compress_type, t, d,
                crc, x.compress_size, x.file_size) = centdir[1:12]
            x.CRC = r_uint(crc) & r_uint(0xffffffff)
            x.dostime = t
            x.dosdate = d
            x.volume, x.internal_attr, x.external_attr = centdir[15:18]
            # Convert date/time code to (year, month, day, hour, min, sec)
            x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
                                     t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
            self.filelist.append(x)
            self.NameToInfo[x.filename] = x
        for data in self.filelist:
            fp.seek(data.header_offset, 0)
            fheader = fp.read(30)
            if fheader[0:4] != stringFileHeader:
                raise BadZipfile, "Bad magic number for file header"
            fheader = runpack(structFileHeader, fheader)
            # file_offset is computed here, since the extra field for
            # the central directory and for the local file header
            # refer to different fields, and they can have different
            # lengths
            data.file_offset = (data.header_offset + 30
                                + fheader[_FH_FILENAME_LENGTH]
                                + fheader[_FH_EXTRA_FIELD_LENGTH])
            fname = fp.read(fheader[_FH_FILENAME_LENGTH])
            if fname != data.orig_filename:
                raise BadZipfile, \
                      'File name in directory "%s" and header "%s" differ.' % (
                          data.orig_filename, fname)
        fp.seek(self.start_dir, 0)

    def getinfo(self, filename):
        """Return the instance of ZipInfo given 'filename'."""
        return self.NameToInfo[filename]

    def read(self, filename):
        zinfo = self.getinfo(filename)
        fp = self.get_fp()
        try:
            filepos = fp.tell()
            fp.seek(zinfo.file_offset, 0)
            bytes = fp.read(intmask(zinfo.compress_size))
            fp.seek(filepos, 0)
            if zinfo.compress_type == ZIP_STORED:
                pass
            elif zinfo.compress_type == ZIP_DEFLATED and rzlib is not None:
                stream = rzlib.inflateInit(wbits=-15)
                try:
                    bytes, _, _ = rzlib.decompress(stream, bytes)
                    # need to feed in unused pad byte so that zlib won't choke
                    ex, _, _ = rzlib.decompress(stream, 'Z')
                    if ex:
                        bytes = bytes + ex
                finally:
                    rzlib.inflateEnd(stream)
            elif zinfo.compress_type == ZIP_DEFLATED:
                raise BadZipfile, \
                      "Cannot decompress file, zlib not installed"
            else:
                raise BadZipfile, \
                      "Unsupported compression method %d for file %s" % \
                (zinfo.compress_type, filename)
            crc = crc32(bytes)
            if crc != zinfo.CRC:
                raise BadZipfile, "Bad CRC-32 for file %s" % filename
            return bytes
        finally:
            fp.close()
