{-# LANGUAGE CPP, BangPatterns, PatternGuards #-}
{-# LANGUAGE GeneralizedNewtypeDeriving, DeriveDataTypeable #-}

-----------------------------------------------------------------------------
-- |
-- Module      :  Codec.Archive.Tar.Index
-- Copyright   :  (c) 2010-2015 Duncan Coutts
-- License     :  BSD3
--
-- Maintainer  :  duncan@community.haskell.org
-- Portability :  portable
--
-- Random access to the content of a @.tar@ archive.
--
-- This module uses common names and so is designed to be imported qualified:
--
-- > import qualified Codec.Archive.Tar.Index as TarIndex
--
-----------------------------------------------------------------------------
module Codec.Archive.Tar.Index (

    -- | The @tar@ format does not contain an index of files within the
    -- archive. Normally, @tar@ file have to be processed linearly. It is
    -- sometimes useful however to be able to get random access to files
    -- within the archive.
    --
    -- This module provides an index of a @tar@ file. A linear pass of the
    -- @tar@ file is needed to 'build' the 'TarIndex', but thereafter you can
    -- 'lookup' paths in the @tar@ file, and then use 'hReadEntry' to
    -- seek to the right part of the file and read the entry.
    --
    -- An index cannot be used to lookup 'Directory' entries in a tar file;
    -- instead, you will get 'TarDir' entry listing all the entries in the
    -- directory.

    -- * Index type
    TarIndex,

    -- * Index lookup
    lookup,
    TarIndexEntry(..),
    toList,

    -- ** I\/O operations
    TarEntryOffset,
    hReadEntry,
    hReadEntryHeader,

    -- * Index construction
    build,
    -- ** Incremental construction
    -- $incremental-construction
    IndexBuilder,
    empty,
    addNextEntry,
    skipNextEntry,
    finalise,
    unfinalise,

    -- * Serialising indexes
    serialise,
    deserialise,

    -- * Lower level operations with offsets and I\/O on tar files
    hReadEntryHeaderOrEof,
    hSeekEntryOffset,
    hSeekEntryContentOffset,
    hSeekEndEntryOffset,
    nextEntryOffset,
    indexEndEntryOffset,
    indexNextEntryOffset,

    -- * Deprecated aliases
    emptyIndex,
    finaliseIndex,

#ifdef TESTS
    prop_lookup,
    prop_toList,
    prop_valid,
    prop_serialise_deserialise,
    prop_serialiseSize,
    prop_index_matches_tar,
    prop_finalise_unfinalise,
#endif
  ) where

import Data.Typeable (Typeable)

import Codec.Archive.Tar.Types as Tar
import Codec.Archive.Tar.Read  as Tar
import qualified Codec.Archive.Tar.Index.StringTable as StringTable
import Codec.Archive.Tar.Index.StringTable (StringTable, StringTableBuilder)
import qualified Codec.Archive.Tar.Index.IntTrie as IntTrie
import Codec.Archive.Tar.Index.IntTrie (IntTrie, IntTrieBuilder)

import qualified System.FilePath.Posix as FilePath
import Data.Monoid (Monoid(..))
#if (MIN_VERSION_base(4,5,0))
import Data.Monoid ((<>))
#endif
import Data.Word
import Data.Int
import Data.Bits
import qualified Data.Array.Unboxed as A
import Prelude hiding (lookup)
import System.IO
import Control.Exception (assert, throwIO)
import Control.DeepSeq

import qualified Data.ByteString        as BS
import qualified Data.ByteString.Char8  as BS.Char8
import qualified Data.ByteString.Lazy   as LBS
import qualified Data.ByteString.Unsafe as BS
#if MIN_VERSION_bytestring(0,10,2) || defined(MIN_VERSION_bytestring_builder)
import Data.ByteString.Builder          as BS
import Data.ByteString.Builder.Extra    as BS (toLazyByteStringWith,
                                               untrimmedStrategy)
#else
import Data.ByteString.Lazy.Builder     as BS
import Data.ByteString.Lazy.Builder.Extras as BS (toLazyByteStringWith,
                                                  untrimmedStrategy)
#endif

#ifdef TESTS
import qualified Prelude
import Test.QuickCheck
import Test.QuickCheck.Property (ioProperty)
import Control.Applicative ((<$>), (<*>))
import Control.Monad (unless)
import Data.List (nub, sort, sortBy, stripPrefix, isPrefixOf)
import Data.Maybe
import Data.Function (on)
import Control.Exception (SomeException, try)
import Codec.Archive.Tar.Write          as Tar
import qualified Data.ByteString.Handle as HBS
#endif


-- | An index of the entries in a tar file.
--
-- This index type is designed to be quite compact and suitable to store either
-- on disk or in memory.
--
data TarIndex = TarIndex

  -- As an example of how the mapping works, consider these example files:
  --   "foo/bar.hs" at offset 0
  --   "foo/baz.hs" at offset 1024
  --
  -- We split the paths into components and enumerate them.
  --   { "foo" -> TokenId 0, "bar.hs" -> TokenId 1,  "baz.hs" -> TokenId 2 }
  --
  -- We convert paths into sequences of 'TokenId's, i.e.
  --   "foo/bar.hs" becomes [PathComponentId 0, PathComponentId 1]
  --   "foo/baz.hs" becomes [PathComponentId 0, PathComponentId 2]
  --
  -- We use a trie mapping sequences of 'PathComponentId's to the entry offset:
  --  { [PathComponentId 0, PathComponentId 1] -> offset 0
  --  , [PathComponentId 0, PathComponentId 2] -> offset 1024 }

  -- The mapping of filepath components as strings to ids.
  {-# UNPACK #-} !(StringTable PathComponentId)

  -- Mapping of sequences of filepath component ids to tar entry offsets.
  {-# UNPACK #-} !(IntTrie PathComponentId TarEntryOffset)

  -- The offset immediatly after the last entry, where we would append any
  -- additional entries.
  {-# UNPACK #-} !TarEntryOffset

  deriving (TarIndex -> TarIndex -> Bool
(TarIndex -> TarIndex -> Bool)
-> (TarIndex -> TarIndex -> Bool) -> Eq TarIndex
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: TarIndex -> TarIndex -> Bool
$c/= :: TarIndex -> TarIndex -> Bool
== :: TarIndex -> TarIndex -> Bool
$c== :: TarIndex -> TarIndex -> Bool
Eq, Int -> TarIndex -> ShowS
[TarIndex] -> ShowS
TarIndex -> String
(Int -> TarIndex -> ShowS)
-> (TarIndex -> String) -> ([TarIndex] -> ShowS) -> Show TarIndex
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [TarIndex] -> ShowS
$cshowList :: [TarIndex] -> ShowS
show :: TarIndex -> String
$cshow :: TarIndex -> String
showsPrec :: Int -> TarIndex -> ShowS
$cshowsPrec :: Int -> TarIndex -> ShowS
Show, Typeable)

instance NFData TarIndex where
  rnf :: TarIndex -> ()
rnf (TarIndex StringTable PathComponentId
_ IntTrie PathComponentId Word32
_ Word32
_) = () -- fully strict by construction

-- | The result of 'lookup' in a 'TarIndex'. It can either be a file directly,
-- or a directory entry containing further entries (and all subdirectories
-- recursively). Note that the subtrees are constructed lazily, so it's
-- cheaper if you don't look at them.
--
data TarIndexEntry = TarFileEntry {-# UNPACK #-} !TarEntryOffset
                   | TarDir [(FilePath, TarIndexEntry)]
  deriving (Int -> TarIndexEntry -> ShowS
[TarIndexEntry] -> ShowS
TarIndexEntry -> String
(Int -> TarIndexEntry -> ShowS)
-> (TarIndexEntry -> String)
-> ([TarIndexEntry] -> ShowS)
-> Show TarIndexEntry
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [TarIndexEntry] -> ShowS
$cshowList :: [TarIndexEntry] -> ShowS
show :: TarIndexEntry -> String
$cshow :: TarIndexEntry -> String
showsPrec :: Int -> TarIndexEntry -> ShowS
$cshowsPrec :: Int -> TarIndexEntry -> ShowS
Show, Typeable)


newtype PathComponentId = PathComponentId Int
  deriving (PathComponentId -> PathComponentId -> Bool
(PathComponentId -> PathComponentId -> Bool)
-> (PathComponentId -> PathComponentId -> Bool)
-> Eq PathComponentId
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: PathComponentId -> PathComponentId -> Bool
$c/= :: PathComponentId -> PathComponentId -> Bool
== :: PathComponentId -> PathComponentId -> Bool
$c== :: PathComponentId -> PathComponentId -> Bool
Eq, Eq PathComponentId
Eq PathComponentId
-> (PathComponentId -> PathComponentId -> Ordering)
-> (PathComponentId -> PathComponentId -> Bool)
-> (PathComponentId -> PathComponentId -> Bool)
-> (PathComponentId -> PathComponentId -> Bool)
-> (PathComponentId -> PathComponentId -> Bool)
-> (PathComponentId -> PathComponentId -> PathComponentId)
-> (PathComponentId -> PathComponentId -> PathComponentId)
-> Ord PathComponentId
PathComponentId -> PathComponentId -> Bool
PathComponentId -> PathComponentId -> Ordering
PathComponentId -> PathComponentId -> PathComponentId
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: PathComponentId -> PathComponentId -> PathComponentId
$cmin :: PathComponentId -> PathComponentId -> PathComponentId
max :: PathComponentId -> PathComponentId -> PathComponentId
$cmax :: PathComponentId -> PathComponentId -> PathComponentId
>= :: PathComponentId -> PathComponentId -> Bool
$c>= :: PathComponentId -> PathComponentId -> Bool
> :: PathComponentId -> PathComponentId -> Bool
$c> :: PathComponentId -> PathComponentId -> Bool
<= :: PathComponentId -> PathComponentId -> Bool
$c<= :: PathComponentId -> PathComponentId -> Bool
< :: PathComponentId -> PathComponentId -> Bool
$c< :: PathComponentId -> PathComponentId -> Bool
compare :: PathComponentId -> PathComponentId -> Ordering
$ccompare :: PathComponentId -> PathComponentId -> Ordering
Ord, Int -> PathComponentId
PathComponentId -> Int
PathComponentId -> [PathComponentId]
PathComponentId -> PathComponentId
PathComponentId -> PathComponentId -> [PathComponentId]
PathComponentId
-> PathComponentId -> PathComponentId -> [PathComponentId]
(PathComponentId -> PathComponentId)
-> (PathComponentId -> PathComponentId)
-> (Int -> PathComponentId)
-> (PathComponentId -> Int)
-> (PathComponentId -> [PathComponentId])
-> (PathComponentId -> PathComponentId -> [PathComponentId])
-> (PathComponentId -> PathComponentId -> [PathComponentId])
-> (PathComponentId
    -> PathComponentId -> PathComponentId -> [PathComponentId])
-> Enum PathComponentId
forall a.
(a -> a)
-> (a -> a)
-> (Int -> a)
-> (a -> Int)
-> (a -> [a])
-> (a -> a -> [a])
-> (a -> a -> [a])
-> (a -> a -> a -> [a])
-> Enum a
enumFromThenTo :: PathComponentId
-> PathComponentId -> PathComponentId -> [PathComponentId]
$cenumFromThenTo :: PathComponentId
-> PathComponentId -> PathComponentId -> [PathComponentId]
enumFromTo :: PathComponentId -> PathComponentId -> [PathComponentId]
$cenumFromTo :: PathComponentId -> PathComponentId -> [PathComponentId]
enumFromThen :: PathComponentId -> PathComponentId -> [PathComponentId]
$cenumFromThen :: PathComponentId -> PathComponentId -> [PathComponentId]
enumFrom :: PathComponentId -> [PathComponentId]
$cenumFrom :: PathComponentId -> [PathComponentId]
fromEnum :: PathComponentId -> Int
$cfromEnum :: PathComponentId -> Int
toEnum :: Int -> PathComponentId
$ctoEnum :: Int -> PathComponentId
pred :: PathComponentId -> PathComponentId
$cpred :: PathComponentId -> PathComponentId
succ :: PathComponentId -> PathComponentId
$csucc :: PathComponentId -> PathComponentId
Enum, Int -> PathComponentId -> ShowS
[PathComponentId] -> ShowS
PathComponentId -> String
(Int -> PathComponentId -> ShowS)
-> (PathComponentId -> String)
-> ([PathComponentId] -> ShowS)
-> Show PathComponentId
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [PathComponentId] -> ShowS
$cshowList :: [PathComponentId] -> ShowS
show :: PathComponentId -> String
$cshow :: PathComponentId -> String
showsPrec :: Int -> PathComponentId -> ShowS
$cshowsPrec :: Int -> PathComponentId -> ShowS
Show, Typeable)

-- | An offset within a tar file. Use 'hReadEntry', 'hReadEntryHeader' or
-- 'hSeekEntryOffset'.
--
-- This is actually a tar \"record\" number, not a byte offset.
--
type TarEntryOffset = Word32


-- | Look up a given filepath in the 'TarIndex'. It may return a 'TarFileEntry'
-- containing the 'TarEntryOffset' of the file within the tar file, or if
-- the filepath identifies a directory then it returns a 'TarDir' containing
-- the list of files within that directory.
--
-- Given the 'TarEntryOffset' you can then use one of the I\/O operations:
--
-- * 'hReadEntry' to read the whole entry;
--
-- * 'hReadEntryHeader' to read just the file metadata (e.g. its length);
--
lookup :: TarIndex -> FilePath -> Maybe TarIndexEntry
lookup :: TarIndex -> String -> Maybe TarIndexEntry
lookup (TarIndex StringTable PathComponentId
pathTable IntTrie PathComponentId Word32
pathTrie Word32
_) String
path = do
    [PathComponentId]
fpath  <- StringTable PathComponentId -> String -> Maybe [PathComponentId]
toComponentIds StringTable PathComponentId
pathTable String
path
    TrieLookup PathComponentId Word32
tentry <- IntTrie PathComponentId Word32
-> [PathComponentId] -> Maybe (TrieLookup PathComponentId Word32)
forall k v.
(Enum k, Enum v) =>
IntTrie k v -> [k] -> Maybe (TrieLookup k v)
IntTrie.lookup IntTrie PathComponentId Word32
pathTrie [PathComponentId]
fpath
    TarIndexEntry -> Maybe TarIndexEntry
forall (m :: * -> *) a. Monad m => a -> m a
return (TrieLookup PathComponentId Word32 -> TarIndexEntry
mkIndexEntry TrieLookup PathComponentId Word32
tentry)
  where
    mkIndexEntry :: TrieLookup PathComponentId Word32 -> TarIndexEntry
mkIndexEntry (IntTrie.Entry Word32
offset)        = Word32 -> TarIndexEntry
TarFileEntry Word32
offset
    mkIndexEntry (IntTrie.Completions Completions PathComponentId Word32
entries) =
      [(String, TarIndexEntry)] -> TarIndexEntry
TarDir [ (StringTable PathComponentId -> PathComponentId -> String
fromComponentId StringTable PathComponentId
pathTable PathComponentId
key, TrieLookup PathComponentId Word32 -> TarIndexEntry
mkIndexEntry TrieLookup PathComponentId Word32
entry)
             | (PathComponentId
key, TrieLookup PathComponentId Word32
entry) <- Completions PathComponentId Word32
entries ]


toComponentIds :: StringTable PathComponentId -> FilePath -> Maybe [PathComponentId]
toComponentIds :: StringTable PathComponentId -> String -> Maybe [PathComponentId]
toComponentIds StringTable PathComponentId
table =
    [PathComponentId] -> [ByteString] -> Maybe [PathComponentId]
lookupComponents []
  ([ByteString] -> Maybe [PathComponentId])
-> (String -> [ByteString]) -> String -> Maybe [PathComponentId]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (ByteString -> Bool) -> [ByteString] -> [ByteString]
forall a. (a -> Bool) -> [a] -> [a]
filter (ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
/= Char -> ByteString
BS.Char8.singleton Char
'.')
  ([ByteString] -> [ByteString])
-> (String -> [ByteString]) -> String -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [ByteString]
splitDirectories
  (ByteString -> [ByteString])
-> (String -> ByteString) -> String -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> ByteString
BS.Char8.pack
  where
    lookupComponents :: [PathComponentId] -> [ByteString] -> Maybe [PathComponentId]
lookupComponents [PathComponentId]
cs' []     = [PathComponentId] -> Maybe [PathComponentId]
forall a. a -> Maybe a
Just ([PathComponentId] -> [PathComponentId]
forall a. [a] -> [a]
reverse [PathComponentId]
cs')
    lookupComponents [PathComponentId]
cs' (ByteString
c:[ByteString]
cs) = case StringTable PathComponentId -> ByteString -> Maybe PathComponentId
forall id. Enum id => StringTable id -> ByteString -> Maybe id
StringTable.lookup StringTable PathComponentId
table ByteString
c of
      Maybe PathComponentId
Nothing  -> Maybe [PathComponentId]
forall a. Maybe a
Nothing
      Just PathComponentId
cid -> [PathComponentId] -> [ByteString] -> Maybe [PathComponentId]
lookupComponents (PathComponentId
cidPathComponentId -> [PathComponentId] -> [PathComponentId]
forall a. a -> [a] -> [a]
:[PathComponentId]
cs') [ByteString]
cs

fromComponentId :: StringTable PathComponentId -> PathComponentId -> FilePath
fromComponentId :: StringTable PathComponentId -> PathComponentId -> String
fromComponentId StringTable PathComponentId
table = ByteString -> String
BS.Char8.unpack (ByteString -> String)
-> (PathComponentId -> ByteString) -> PathComponentId -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. StringTable PathComponentId -> PathComponentId -> ByteString
forall id. Enum id => StringTable id -> id -> ByteString
StringTable.index StringTable PathComponentId
table

-- | All the files in the index with their corresponding 'TarEntryOffset's.
--
-- Note that the files are in no special order. If you intend to read all or
-- most files then is is recommended to sort by the 'TarEntryOffset'.
--
toList :: TarIndex -> [(FilePath, TarEntryOffset)]
toList :: TarIndex -> [(String, Word32)]
toList (TarIndex StringTable PathComponentId
pathTable IntTrie PathComponentId Word32
pathTrie Word32
_) =
    [ (String
path, Word32
off)
    | ([PathComponentId]
cids, Word32
off) <- IntTrie PathComponentId Word32 -> [([PathComponentId], Word32)]
forall k v. (Enum k, Enum v) => IntTrie k v -> [([k], v)]
IntTrie.toList IntTrie PathComponentId Word32
pathTrie
    , let path :: String
path = [String] -> String
FilePath.joinPath ((PathComponentId -> String) -> [PathComponentId] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map (StringTable PathComponentId -> PathComponentId -> String
fromComponentId StringTable PathComponentId
pathTable) [PathComponentId]
cids) ]


-- | Build a 'TarIndex' from a sequence of tar 'Entries'. The 'Entries' are
-- assumed to start at offset @0@ within a file.
--
build :: Entries e -> Either e TarIndex
build :: forall e. Entries e -> Either e TarIndex
build = IndexBuilder -> Entries e -> Either e TarIndex
forall {a}. IndexBuilder -> Entries a -> Either a TarIndex
go IndexBuilder
empty
  where
    go :: IndexBuilder -> Entries a -> Either a TarIndex
go !IndexBuilder
builder (Next Entry
e Entries a
es) = IndexBuilder -> Entries a -> Either a TarIndex
go (Entry -> IndexBuilder -> IndexBuilder
addNextEntry Entry
e IndexBuilder
builder) Entries a
es
    go !IndexBuilder
builder  Entries a
Done       = TarIndex -> Either a TarIndex
forall a b. b -> Either a b
Right (TarIndex -> Either a TarIndex) -> TarIndex -> Either a TarIndex
forall a b. (a -> b) -> a -> b
$! IndexBuilder -> TarIndex
finalise IndexBuilder
builder
    go !IndexBuilder
_       (Fail a
err)  = a -> Either a TarIndex
forall a b. a -> Either a b
Left a
err


-- $incremental-construction
-- If you need more control than 'build' then you can construct the index
-- in an acumulator style using the 'IndexBuilder' and operations.
--
-- Start with 'empty' and use 'addNextEntry' (or 'skipNextEntry') for
-- each 'Entry' in the tar file in order. Every entry must added or skipped in
-- order, otherwise the resulting 'TarIndex' will report the wrong
-- 'TarEntryOffset's. At the end use 'finalise' to get the 'TarIndex'.
--
-- For example, 'build' is simply:
--
-- > build = go empty
-- >   where
-- >     go !builder (Next e es) = go (addNextEntry e builder) es
-- >     go !builder  Done       = Right $! finalise builder
-- >     go !_       (Fail err)  = Left err


-- | The intermediate type used for incremental construction of a 'TarIndex'.
--
data IndexBuilder
   = IndexBuilder !(StringTableBuilder PathComponentId)
                  !(IntTrieBuilder PathComponentId TarEntryOffset)
   {-# UNPACK #-} !TarEntryOffset
  deriving (IndexBuilder -> IndexBuilder -> Bool
(IndexBuilder -> IndexBuilder -> Bool)
-> (IndexBuilder -> IndexBuilder -> Bool) -> Eq IndexBuilder
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: IndexBuilder -> IndexBuilder -> Bool
$c/= :: IndexBuilder -> IndexBuilder -> Bool
== :: IndexBuilder -> IndexBuilder -> Bool
$c== :: IndexBuilder -> IndexBuilder -> Bool
Eq, Int -> IndexBuilder -> ShowS
[IndexBuilder] -> ShowS
IndexBuilder -> String
(Int -> IndexBuilder -> ShowS)
-> (IndexBuilder -> String)
-> ([IndexBuilder] -> ShowS)
-> Show IndexBuilder
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [IndexBuilder] -> ShowS
$cshowList :: [IndexBuilder] -> ShowS
show :: IndexBuilder -> String
$cshow :: IndexBuilder -> String
showsPrec :: Int -> IndexBuilder -> ShowS
$cshowsPrec :: Int -> IndexBuilder -> ShowS
Show)

instance NFData IndexBuilder where
  rnf :: IndexBuilder -> ()
rnf (IndexBuilder StringTableBuilder PathComponentId
_ IntTrieBuilder PathComponentId Word32
_ Word32
_) = () -- fully strict by construction

-- | The initial empty 'IndexBuilder'.
--
empty :: IndexBuilder
empty :: IndexBuilder
empty = StringTableBuilder PathComponentId
-> IntTrieBuilder PathComponentId Word32 -> Word32 -> IndexBuilder
IndexBuilder StringTableBuilder PathComponentId
forall id. StringTableBuilder id
StringTable.empty IntTrieBuilder PathComponentId Word32
forall k v. IntTrieBuilder k v
IntTrie.empty Word32
0

emptyIndex :: IndexBuilder
emptyIndex :: IndexBuilder
emptyIndex = IndexBuilder
empty
{-# DEPRECATED emptyIndex "Use TarIndex.empty" #-}

-- | Add the next 'Entry' into the 'IndexBuilder'.
--
addNextEntry :: Entry -> IndexBuilder -> IndexBuilder
addNextEntry :: Entry -> IndexBuilder -> IndexBuilder
addNextEntry Entry
entry (IndexBuilder StringTableBuilder PathComponentId
stbl IntTrieBuilder PathComponentId Word32
itrie Word32
nextOffset) =
    StringTableBuilder PathComponentId
-> IntTrieBuilder PathComponentId Word32 -> Word32 -> IndexBuilder
IndexBuilder StringTableBuilder PathComponentId
stbl' IntTrieBuilder PathComponentId Word32
itrie'
                 (Entry -> Word32 -> Word32
nextEntryOffset Entry
entry Word32
nextOffset)
  where
    !entrypath :: [ByteString]
entrypath    = TarPath -> [ByteString]
splitTarPath (Entry -> TarPath
entryTarPath Entry
entry)
    (StringTableBuilder PathComponentId
stbl', [PathComponentId]
cids) = [ByteString]
-> StringTableBuilder PathComponentId
-> (StringTableBuilder PathComponentId, [PathComponentId])
forall id.
Enum id =>
[ByteString]
-> StringTableBuilder id -> (StringTableBuilder id, [id])
StringTable.inserts [ByteString]
entrypath StringTableBuilder PathComponentId
stbl
    itrie' :: IntTrieBuilder PathComponentId Word32
itrie'        = [PathComponentId]
-> Word32
-> IntTrieBuilder PathComponentId Word32
-> IntTrieBuilder PathComponentId Word32
forall k v.
(Enum k, Enum v) =>
[k] -> v -> IntTrieBuilder k v -> IntTrieBuilder k v
IntTrie.insert [PathComponentId]
cids Word32
nextOffset IntTrieBuilder PathComponentId Word32
itrie

-- | Use this function if you want to skip some entries and not add them to the
-- final 'TarIndex'.
--
skipNextEntry :: Entry -> IndexBuilder -> IndexBuilder
skipNextEntry :: Entry -> IndexBuilder -> IndexBuilder
skipNextEntry Entry
entry (IndexBuilder StringTableBuilder PathComponentId
stbl IntTrieBuilder PathComponentId Word32
itrie Word32
nextOffset) =
    StringTableBuilder PathComponentId
-> IntTrieBuilder PathComponentId Word32 -> Word32 -> IndexBuilder
IndexBuilder StringTableBuilder PathComponentId
stbl IntTrieBuilder PathComponentId Word32
itrie (Entry -> Word32 -> Word32
nextEntryOffset Entry
entry Word32
nextOffset)

-- | Finish accumulating 'Entry' information and build the compact 'TarIndex'
-- lookup structure.
--
finalise :: IndexBuilder -> TarIndex
finalise :: IndexBuilder -> TarIndex
finalise (IndexBuilder StringTableBuilder PathComponentId
stbl IntTrieBuilder PathComponentId Word32
itrie Word32
finalOffset) =
    StringTable PathComponentId
-> IntTrie PathComponentId Word32 -> Word32 -> TarIndex
TarIndex StringTable PathComponentId
pathTable IntTrie PathComponentId Word32
pathTrie Word32
finalOffset
  where
    pathTable :: StringTable PathComponentId
pathTable = StringTableBuilder PathComponentId -> StringTable PathComponentId
forall id. Enum id => StringTableBuilder id -> StringTable id
StringTable.finalise StringTableBuilder PathComponentId
stbl
    pathTrie :: IntTrie PathComponentId Word32
pathTrie  = IntTrieBuilder PathComponentId Word32
-> IntTrie PathComponentId Word32
forall k v. IntTrieBuilder k v -> IntTrie k v
IntTrie.finalise IntTrieBuilder PathComponentId Word32
itrie

finaliseIndex :: IndexBuilder -> TarIndex
finaliseIndex :: IndexBuilder -> TarIndex
finaliseIndex = IndexBuilder -> TarIndex
finalise
{-# DEPRECATED finaliseIndex "Use TarIndex.finalise" #-}

-- | This is the offset immediately following the entry most recently added
-- to the 'IndexBuilder'. You might use this if you need to know the offsets
-- but don't want to use the 'TarIndex' lookup structure.
-- Use with 'hSeekEntryOffset'. See also 'nextEntryOffset'.
--
indexNextEntryOffset :: IndexBuilder -> TarEntryOffset
indexNextEntryOffset :: IndexBuilder -> Word32
indexNextEntryOffset (IndexBuilder StringTableBuilder PathComponentId
_ IntTrieBuilder PathComponentId Word32
_ Word32
off) = Word32
off

-- | This is the offset immediately following the last entry in the tar file.
-- This can be useful to append further entries into the tar file.
-- Use with 'hSeekEntryOffset', or just use 'hSeekEndEntryOffset' directly.
--
indexEndEntryOffset :: TarIndex -> TarEntryOffset
indexEndEntryOffset :: TarIndex -> Word32
indexEndEntryOffset (TarIndex StringTable PathComponentId
_ IntTrie PathComponentId Word32
_ Word32
off) = Word32
off

-- | Calculate the 'TarEntryOffset' of the next entry, given the size and
-- offset of the current entry.
--
-- This is much like using 'skipNextEntry' and 'indexNextEntryOffset', but without
-- using an 'IndexBuilder'.
--
nextEntryOffset :: Entry -> TarEntryOffset -> TarEntryOffset
nextEntryOffset :: Entry -> Word32 -> Word32
nextEntryOffset Entry
entry Word32
offset =
    Word32
offset
  Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+ Word32
1
  Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+ case Entry -> EntryContent
entryContent Entry
entry of
      NormalFile     ByteString
_   FileSize
size -> FileSize -> Word32
blocks FileSize
size
      OtherEntryType Char
_ ByteString
_ FileSize
size -> FileSize -> Word32
blocks FileSize
size
      EntryContent
_                       -> Word32
0
  where
    -- NOTE: to avoid underflow, do the (fromIntegral :: Int64 -> Word32) last
    blocks :: Int64 -> TarEntryOffset
    blocks :: FileSize -> Word32
blocks FileSize
size = FileSize -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (FileSize
1 FileSize -> FileSize -> FileSize
forall a. Num a => a -> a -> a
+ (FileSize
size FileSize -> FileSize -> FileSize
forall a. Num a => a -> a -> a
- FileSize
1) FileSize -> FileSize -> FileSize
forall a. Integral a => a -> a -> a
`div` FileSize
512)

type FilePathBS = BS.ByteString

splitTarPath :: TarPath -> [FilePathBS]
splitTarPath :: TarPath -> [ByteString]
splitTarPath (TarPath ByteString
name ByteString
prefix) =
    ByteString -> [ByteString]
splitDirectories ByteString
prefix [ByteString] -> [ByteString] -> [ByteString]
forall a. [a] -> [a] -> [a]
++ ByteString -> [ByteString]
splitDirectories ByteString
name

splitDirectories :: FilePathBS -> [FilePathBS]
splitDirectories :: ByteString -> [ByteString]
splitDirectories ByteString
bs =
    case Char -> ByteString -> [ByteString]
BS.Char8.split Char
'/' ByteString
bs of
      ByteString
c:[ByteString]
cs | ByteString -> Bool
BS.null ByteString
c -> Char -> ByteString
BS.Char8.singleton Char
'/' ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: (ByteString -> Bool) -> [ByteString] -> [ByteString]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> (ByteString -> Bool) -> ByteString -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Bool
BS.null) [ByteString]
cs
      [ByteString]
cs               ->                          (ByteString -> Bool) -> [ByteString] -> [ByteString]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> (ByteString -> Bool) -> ByteString -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Bool
BS.null) [ByteString]
cs


-------------------------
-- Resume building an existing index
--

-- | Resume building an existing index
--
-- A 'TarIndex' is optimized for a highly compact and efficient in-memory
-- representation. This, however, makes it read-only. If you have an existing
-- 'TarIndex' for a large file, and want to add to it, you can translate the
-- 'TarIndex' back to an 'IndexBuilder'. Be aware that this is a relatively
-- costly operation (linear in the size of the 'TarIndex'), though still
-- faster than starting again from scratch.
--
-- This is the left inverse to 'finalise' (modulo ordering).
--
unfinalise :: TarIndex -> IndexBuilder
unfinalise :: TarIndex -> IndexBuilder
unfinalise (TarIndex StringTable PathComponentId
pathTable IntTrie PathComponentId Word32
pathTrie Word32
finalOffset) =
    StringTableBuilder PathComponentId
-> IntTrieBuilder PathComponentId Word32 -> Word32 -> IndexBuilder
IndexBuilder (StringTable PathComponentId -> StringTableBuilder PathComponentId
forall id. Enum id => StringTable id -> StringTableBuilder id
StringTable.unfinalise StringTable PathComponentId
pathTable)
                 (IntTrie PathComponentId Word32
-> IntTrieBuilder PathComponentId Word32
forall k v. (Enum k, Enum v) => IntTrie k v -> IntTrieBuilder k v
IntTrie.unfinalise IntTrie PathComponentId Word32
pathTrie)
                 Word32
finalOffset


-------------------------
-- I/O operations
--

-- | Reads an entire 'Entry' at the given 'TarEntryOffset' in the tar file.
-- The 'Handle' must be open for reading and be seekable.
--
-- This reads the whole entry into memory strictly, not incrementally. For more
-- control, use 'hReadEntryHeader' and then read the entry content manually.
--
hReadEntry :: Handle -> TarEntryOffset -> IO Entry
hReadEntry :: Handle -> Word32 -> IO Entry
hReadEntry Handle
hnd Word32
off = do
    Entry
entry <- Handle -> Word32 -> IO Entry
hReadEntryHeader Handle
hnd Word32
off
    case Entry -> EntryContent
entryContent Entry
entry of
      NormalFile       ByteString
_ FileSize
size -> do ByteString
body <- Handle -> Int -> IO ByteString
LBS.hGet Handle
hnd (FileSize -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral FileSize
size)
                                    Entry -> IO Entry
forall (m :: * -> *) a. Monad m => a -> m a
return Entry
entry {
                                      entryContent :: EntryContent
entryContent = ByteString -> FileSize -> EntryContent
NormalFile ByteString
body FileSize
size
                                    }
      OtherEntryType Char
c ByteString
_ FileSize
size -> do ByteString
body <- Handle -> Int -> IO ByteString
LBS.hGet Handle
hnd (FileSize -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral FileSize
size)
                                    Entry -> IO Entry
forall (m :: * -> *) a. Monad m => a -> m a
return Entry
entry {
                                      entryContent :: EntryContent
entryContent = Char -> ByteString -> FileSize -> EntryContent
OtherEntryType Char
c ByteString
body FileSize
size
                                    }
      EntryContent
_                       -> Entry -> IO Entry
forall (m :: * -> *) a. Monad m => a -> m a
return Entry
entry

-- | Read the header for a 'Entry' at the given 'TarEntryOffset' in the tar
-- file. The 'entryContent' will contain the correct metadata but an empty file
-- content. The 'Handle' must be open for reading and be seekable.
--
-- The 'Handle' position is advanced to the beginning of the entry content (if
-- any). You must check the 'entryContent' to see if the entry is of type
-- 'NormalFile'. If it is, the 'NormalFile' gives the content length and you
-- are free to read this much data from the 'Handle'.
--
-- > entry <- Tar.hReadEntryHeader hnd
-- > case Tar.entryContent entry of
-- >   Tar.NormalFile _ size -> do content <- BS.hGet hnd size
-- >                               ...
--
-- Of course you don't have to read it all in one go (as 'hReadEntry' does),
-- you can use any appropriate method to read it incrementally.
--
-- In addition to I\/O errors, this can throw a 'FormatError' if the offset is
-- wrong, or if the file is not valid tar format.
--
-- There is also the lower level operation 'hSeekEntryOffset'.
--
hReadEntryHeader :: Handle -> TarEntryOffset -> IO Entry
hReadEntryHeader :: Handle -> Word32 -> IO Entry
hReadEntryHeader Handle
hnd Word32
blockOff = do
    Handle -> Word32 -> IO ()
hSeekEntryOffset Handle
hnd Word32
blockOff
    ByteString
header <- Handle -> Int -> IO ByteString
LBS.hGet Handle
hnd Int
512
    case ByteString -> Entries FormatError
Tar.read ByteString
header of
      Tar.Next Entry
entry Entries FormatError
_ -> Entry -> IO Entry
forall (m :: * -> *) a. Monad m => a -> m a
return Entry
entry
      Tar.Fail FormatError
e       -> FormatError -> IO Entry
forall e a. Exception e => e -> IO a
throwIO FormatError
e
      Entries FormatError
Tar.Done         -> String -> IO Entry
forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"hReadEntryHeader: impossible"

-- | Set the 'Handle' position to the position corresponding to the given
-- 'TarEntryOffset'.
--
-- This position is where the entry metadata can be read. If you already know
-- the entry has a body (and perhaps know it's length), you may wish to seek to
-- the body content directly using 'hSeekEntryContentOffset'.
--
hSeekEntryOffset :: Handle -> TarEntryOffset -> IO ()
hSeekEntryOffset :: Handle -> Word32 -> IO ()
hSeekEntryOffset Handle
hnd Word32
blockOff =
    Handle -> SeekMode -> Integer -> IO ()
hSeek Handle
hnd SeekMode
AbsoluteSeek (Word32 -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word32
blockOff Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
* Integer
512)

-- | Set the 'Handle' position to the entry content position corresponding to
-- the given 'TarEntryOffset'.
--
-- This position is where the entry content can be read using ordinary I\/O
-- operations (though you have to know in advance how big the entry content
-- is). This is /only valid/ if you /already know/ the entry has a body (i.e.
-- is a normal file).
--
hSeekEntryContentOffset :: Handle -> TarEntryOffset -> IO ()
hSeekEntryContentOffset :: Handle -> Word32 -> IO ()
hSeekEntryContentOffset Handle
hnd Word32
blockOff =
    Handle -> Word32 -> IO ()
hSeekEntryOffset Handle
hnd (Word32
blockOff Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+ Word32
1)

-- | This is a low level variant on 'hReadEntryHeader', that can be used to
-- iterate through a tar file, entry by entry.
--
-- It has a few differences compared to 'hReadEntryHeader':
--
-- * It returns an indication when the end of the tar file is reached.
--
-- * It /does not/ move the 'Handle' position to the beginning of the entry
--   content.
--
-- * It returns the 'TarEntryOffset' of the next entry.
--
-- After this action, the 'Handle' position is not in any useful place. If
-- you want to skip to the next entry, take the 'TarEntryOffset' returned and
-- use 'hReadEntryHeaderOrEof' again. Or if having inspected the 'Entry'
-- header you want to read the entry content (if it has one) then use
-- 'hSeekEntryContentOffset' on the original input 'TarEntryOffset'.
--
hReadEntryHeaderOrEof :: Handle -> TarEntryOffset
                      -> IO (Maybe (Entry, TarEntryOffset))
hReadEntryHeaderOrEof :: Handle -> Word32 -> IO (Maybe (Entry, Word32))
hReadEntryHeaderOrEof Handle
hnd Word32
blockOff = do
    Handle -> Word32 -> IO ()
hSeekEntryOffset Handle
hnd Word32
blockOff
    ByteString
header <- Handle -> Int -> IO ByteString
LBS.hGet Handle
hnd Int
1024
    case ByteString -> Entries FormatError
Tar.read ByteString
header of
      Tar.Next Entry
entry Entries FormatError
_ -> let !blockOff' :: Word32
blockOff' = Entry -> Word32 -> Word32
nextEntryOffset Entry
entry Word32
blockOff
                           in Maybe (Entry, Word32) -> IO (Maybe (Entry, Word32))
forall (m :: * -> *) a. Monad m => a -> m a
return ((Entry, Word32) -> Maybe (Entry, Word32)
forall a. a -> Maybe a
Just (Entry
entry, Word32
blockOff'))
      Entries FormatError
Tar.Done         -> Maybe (Entry, Word32) -> IO (Maybe (Entry, Word32))
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe (Entry, Word32)
forall a. Maybe a
Nothing
      Tar.Fail FormatError
e       -> FormatError -> IO (Maybe (Entry, Word32))
forall e a. Exception e => e -> IO a
throwIO FormatError
e

-- | Seek to the end of a tar file, to the position where new entries can
-- be appended, and return that 'TarEntryOffset'.
--
-- If you have a valid 'TarIndex' for this tar file then you should supply it
-- because it allows seeking directly to the correct location.
--
-- If you do not have an index, then this becomes an expensive linear
-- operation because we have to read each tar entry header from the beginning
-- to find the location immediately after the last entry (this is because tar
-- files have a variable length trailer and we cannot reliably find that by
-- starting at the end). In this mode, it will fail with an exception if the
-- file is not in fact in the tar format.
--
hSeekEndEntryOffset :: Handle -> Maybe TarIndex -> IO TarEntryOffset
hSeekEndEntryOffset :: Handle -> Maybe TarIndex -> IO Word32
hSeekEndEntryOffset Handle
hnd (Just TarIndex
index) = do
    let offset :: Word32
offset = TarIndex -> Word32
indexEndEntryOffset TarIndex
index
    Handle -> Word32 -> IO ()
hSeekEntryOffset Handle
hnd Word32
offset
    Word32 -> IO Word32
forall (m :: * -> *) a. Monad m => a -> m a
return Word32
offset

hSeekEndEntryOffset Handle
hnd Maybe TarIndex
Nothing = do
    Integer
size <- Handle -> IO Integer
hFileSize Handle
hnd
    if Integer
size Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
== Integer
0
      then Word32 -> IO Word32
forall (m :: * -> *) a. Monad m => a -> m a
return Word32
0
      else Word32 -> IO Word32
seekToEnd Word32
0
  where
    seekToEnd :: Word32 -> IO Word32
seekToEnd Word32
offset = do
      Maybe (Entry, Word32)
mbe <- Handle -> Word32 -> IO (Maybe (Entry, Word32))
hReadEntryHeaderOrEof Handle
hnd Word32
offset
      case Maybe (Entry, Word32)
mbe of
        Maybe (Entry, Word32)
Nothing -> do Handle -> Word32 -> IO ()
hSeekEntryOffset Handle
hnd Word32
offset
                      Word32 -> IO Word32
forall (m :: * -> *) a. Monad m => a -> m a
return Word32
offset
        Just (Entry
_, Word32
offset') -> Word32 -> IO Word32
seekToEnd Word32
offset'

-------------------------
-- (de)serialisation
--

-- | The 'TarIndex' is compact in memory, and it has a similarly compact
-- external representation.
--
serialise :: TarIndex -> BS.ByteString
serialise :: TarIndex -> ByteString
serialise = ByteString -> ByteString
toStrict (ByteString -> ByteString)
-> (TarIndex -> ByteString) -> TarIndex -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. TarIndex -> ByteString
serialiseLBS

-- we keep this version around just so we can check we got the size right.
serialiseLBS :: TarIndex -> LBS.ByteString
serialiseLBS :: TarIndex -> ByteString
serialiseLBS TarIndex
index =
    AllocationStrategy -> ByteString -> Builder -> ByteString
BS.toLazyByteStringWith
      (Int -> Int -> AllocationStrategy
BS.untrimmedStrategy (TarIndex -> Int
serialiseSize TarIndex
index) Int
512) ByteString
LBS.empty
      (TarIndex -> Builder
serialiseBuilder TarIndex
index)

serialiseSize :: TarIndex -> Int
serialiseSize :: TarIndex -> Int
serialiseSize (TarIndex StringTable PathComponentId
stringTable IntTrie PathComponentId Word32
intTrie Word32
_) =
    StringTable PathComponentId -> Int
forall id. StringTable id -> Int
StringTable.serialiseSize StringTable PathComponentId
stringTable
  Int -> Int -> Int
forall a. Num a => a -> a -> a
+ IntTrie PathComponentId Word32 -> Int
forall k v. IntTrie k v -> Int
IntTrie.serialiseSize IntTrie PathComponentId Word32
intTrie
  Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
8

serialiseBuilder :: TarIndex -> BS.Builder
serialiseBuilder :: TarIndex -> Builder
serialiseBuilder (TarIndex StringTable PathComponentId
stringTable IntTrie PathComponentId Word32
intTrie Word32
finalOffset) =
     Word32 -> Builder
BS.word32BE Word32
2 -- format version
  Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Word32 -> Builder
BS.word32BE Word32
finalOffset
  Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> StringTable PathComponentId -> Builder
forall id. StringTable id -> Builder
StringTable.serialise StringTable PathComponentId
stringTable
  Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> IntTrie PathComponentId Word32 -> Builder
forall k v. IntTrie k v -> Builder
IntTrie.serialise IntTrie PathComponentId Word32
intTrie

-- | Read the external representation back into a 'TarIndex'.
--
deserialise :: BS.ByteString -> Maybe (TarIndex, BS.ByteString)
deserialise :: ByteString -> Maybe (TarIndex, ByteString)
deserialise ByteString
bs
  | ByteString -> Int
BS.length ByteString
bs Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
8
  = Maybe (TarIndex, ByteString)
forall a. Maybe a
Nothing

  | let ver :: Word32
ver = ByteString -> Int -> Word32
readWord32BE ByteString
bs Int
0
  , Word32
ver Word32 -> Word32 -> Bool
forall a. Eq a => a -> a -> Bool
== Word32
1
  = do let !finalOffset :: Word32
finalOffset = ByteString -> Int -> Word32
readWord32BE ByteString
bs Int
4
       (StringTable PathComponentId
stringTable, ByteString
bs')  <- ByteString -> Maybe (StringTable PathComponentId, ByteString)
forall id. ByteString -> Maybe (StringTable id, ByteString)
StringTable.deserialiseV1 (Int -> ByteString -> ByteString
BS.drop Int
8 ByteString
bs)
       (IntTrie PathComponentId Word32
intTrie,     ByteString
bs'') <- ByteString -> Maybe (IntTrie PathComponentId Word32, ByteString)
forall k v. ByteString -> Maybe (IntTrie k v, ByteString)
IntTrie.deserialise ByteString
bs'
       (TarIndex, ByteString) -> Maybe (TarIndex, ByteString)
forall (m :: * -> *) a. Monad m => a -> m a
return (StringTable PathComponentId
-> IntTrie PathComponentId Word32 -> Word32 -> TarIndex
TarIndex StringTable PathComponentId
stringTable IntTrie PathComponentId Word32
intTrie Word32
finalOffset, ByteString
bs'')

  | let ver :: Word32
ver = ByteString -> Int -> Word32
readWord32BE ByteString
bs Int
0
  , Word32
ver Word32 -> Word32 -> Bool
forall a. Eq a => a -> a -> Bool
== Word32
2
  = do let !finalOffset :: Word32
finalOffset = ByteString -> Int -> Word32
readWord32BE ByteString
bs Int
4
       (StringTable PathComponentId
stringTable, ByteString
bs')  <- ByteString -> Maybe (StringTable PathComponentId, ByteString)
forall id. ByteString -> Maybe (StringTable id, ByteString)
StringTable.deserialiseV2 (Int -> ByteString -> ByteString
BS.drop Int
8 ByteString
bs)
       (IntTrie PathComponentId Word32
intTrie,     ByteString
bs'') <- ByteString -> Maybe (IntTrie PathComponentId Word32, ByteString)
forall k v. ByteString -> Maybe (IntTrie k v, ByteString)
IntTrie.deserialise ByteString
bs'
       (TarIndex, ByteString) -> Maybe (TarIndex, ByteString)
forall (m :: * -> *) a. Monad m => a -> m a
return (StringTable PathComponentId
-> IntTrie PathComponentId Word32 -> Word32 -> TarIndex
TarIndex StringTable PathComponentId
stringTable IntTrie PathComponentId Word32
intTrie Word32
finalOffset, ByteString
bs'')

  | Bool
otherwise = Maybe (TarIndex, ByteString)
forall a. Maybe a
Nothing

readWord32BE :: BS.ByteString -> Int -> Word32
readWord32BE :: ByteString -> Int -> Word32
readWord32BE ByteString
bs Int
i =
    Bool -> Word32 -> Word32
forall a. (?callStack::CallStack) => Bool -> a -> a
assert (Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0 Bool -> Bool -> Bool
&& Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
3 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= ByteString -> Int
BS.length ByteString
bs Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) (Word32 -> Word32) -> Word32 -> Word32
forall a b. (a -> b) -> a -> b
$
    Word8 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (ByteString -> Int -> Word8
BS.unsafeIndex ByteString
bs (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
0)) Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
`shiftL` Int
24
  Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+ Word8 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (ByteString -> Int -> Word8
BS.unsafeIndex ByteString
bs (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)) Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
`shiftL` Int
16
  Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+ Word8 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (ByteString -> Int -> Word8
BS.unsafeIndex ByteString
bs (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2)) Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
`shiftL` Int
8
  Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+ Word8 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (ByteString -> Int -> Word8
BS.unsafeIndex ByteString
bs (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
3))


-------------------------
-- Test properties
--

#ifdef TESTS

-- Not quite the properties of a finite mapping because we also have lookups
-- that result in completions.

prop_lookup :: ValidPaths -> NonEmptyFilePath -> Bool
prop_lookup (ValidPaths paths) (NonEmptyFilePath p) =
  case (lookup index p, Prelude.lookup p paths) of
    (Nothing,                    Nothing)          -> True
    (Just (TarFileEntry offset), Just (_,offset')) -> offset == offset'
    (Just (TarDir entries),      Nothing)          -> sort (nub (map fst entries))
                                                   == sort (nub completions)
    _                                              -> False
  where
    index       = construct paths
    completions = [ head (FilePath.splitDirectories completion)
                  | (path,_) <- paths
                  , completion <- maybeToList $ stripPrefix (p ++ "/") path ]

prop_toList :: ValidPaths -> Bool
prop_toList (ValidPaths paths) =
    sort (toList index)
 == sort [ (path, off) | (path, (_sz, off)) <- paths ]
  where
    index = construct paths

prop_valid :: ValidPaths -> Bool
prop_valid (ValidPaths paths)
  | not $ StringTable.prop_valid   pathbits = error "TarIndex: bad string table"
  | not $ IntTrie.prop_lookup      intpaths = error "TarIndex: bad int trie"
  | not $ IntTrie.prop_completions intpaths = error "TarIndex: bad int trie"
  | not $ prop'                             = error "TarIndex: bad prop"
  | otherwise                               = True

  where
    index@(TarIndex pathTable _ _) = construct paths

    pathbits = concatMap (map BS.Char8.pack . FilePath.splitDirectories . fst)
                         paths
    intpaths = [ (cids, offset)
               | (path, (_size, offset)) <- paths
               , let Just cids = toComponentIds pathTable path ]
    prop' = flip all paths $ \(file, (_size, offset)) ->
      case lookup index file of
        Just (TarFileEntry offset') -> offset' == offset
        _                           -> False

prop_serialise_deserialise :: ValidPaths -> Bool
prop_serialise_deserialise (ValidPaths paths) =
    Just (index, BS.empty) == (deserialise . serialise) index
  where
    index = construct paths

prop_serialiseSize :: ValidPaths -> Bool
prop_serialiseSize (ValidPaths paths) =
    case (LBS.toChunks . serialiseLBS) index of
      [c1] -> BS.length c1 == serialiseSize index
      _    -> False
  where
    index = construct paths

newtype NonEmptyFilePath = NonEmptyFilePath FilePath deriving Show

instance Arbitrary NonEmptyFilePath where
  arbitrary = NonEmptyFilePath . FilePath.joinPath
                <$> listOf1 (elements ["a", "b", "c", "d"])

newtype ValidPaths = ValidPaths [(FilePath, (Int64, TarEntryOffset))] deriving Show

instance Arbitrary ValidPaths where
  arbitrary = do
      paths <- makeNoPrefix <$> listOf arbitraryPath
      sizes <- vectorOf (length paths) (getNonNegative <$> arbitrary)
      let offsets = scanl (\o sz -> o + 1 + blocks sz) 0 sizes
      return (ValidPaths (zip paths (zip sizes offsets)))
    where
      arbitraryPath   = FilePath.joinPath
                         <$> listOf1 (elements ["a", "b", "c", "d"])
      makeNoPrefix [] = []
      makeNoPrefix (k:ks)
        | all (not . isPrefixOfOther k) ks
                     = k : makeNoPrefix ks
        | otherwise  =     makeNoPrefix ks

      isPrefixOfOther a b = a `isPrefixOf` b || b `isPrefixOf` a

      blocks :: Int64 -> TarEntryOffset
      blocks size = fromIntegral (1 + ((size - 1) `div` 512))

-- Helper for bulk construction.
construct :: [(FilePath, (Int64, TarEntryOffset))] -> TarIndex
construct =
    either (\_ -> undefined) id
  . build
  . foldr (\(path, (size, _off)) es -> Next (testEntry path size) es) Done

example0 :: Entries ()
example0 =
         testEntry "foo-1.0/foo-1.0.cabal" 1500 -- at block 0
  `Next` testEntry "foo-1.0/LICENSE"       2000 -- at block 4
  `Next` testEntry "foo-1.0/Data/Foo.hs"   1000 -- at block 9
  `Next` Done

example1 :: Entries ()
example1 =
  Next (testEntry "./" 1500) Done <> example0

testEntry :: FilePath -> Int64 -> Entry
testEntry name size = simpleEntry path (NormalFile mempty size)
  where
    Right path = toTarPath False name

-- | Simple tar archive containing regular files only
data SimpleTarArchive = SimpleTarArchive {
    simpleTarEntries :: Tar.Entries ()
  , simpleTarRaw     :: [(FilePath, LBS.ByteString)]
  , simpleTarBS      :: LBS.ByteString
  }

instance Show SimpleTarArchive where
  show = show . simpleTarRaw

prop_index_matches_tar :: SimpleTarArchive -> Property
prop_index_matches_tar sta =
    ioProperty (try go >>= either (\e -> throwIO (e :: SomeException))
                                  (\_ -> return True))
  where
    go :: IO ()
    go = do
      h <- HBS.readHandle True (simpleTarBS sta)
      goEntries h 0 (simpleTarEntries sta)

    goEntries :: Handle -> TarEntryOffset -> Tar.Entries () -> IO ()
    goEntries _ _ Tar.Done =
      return ()
    goEntries _ _ (Tar.Fail _) =
      throwIO (userError "Fail entry in SimpleTarArchive")
    goEntries h offset (Tar.Next e es) = do
      goEntry h offset e
      goEntries h (nextEntryOffset e offset) es

    goEntry :: Handle -> TarEntryOffset -> Tar.Entry -> IO ()
    goEntry h offset e = do
      e' <- hReadEntry h offset
      case (Tar.entryContent e, Tar.entryContent e') of
        (Tar.NormalFile bs sz, Tar.NormalFile bs' sz') ->
          unless (sz == sz' && bs == bs') $
            throwIO $ userError "Entry mismatch"
        _otherwise ->
          throwIO $ userError "unexpected entry types"

instance Arbitrary SimpleTarArchive where
  arbitrary = do
      numEntries <- sized $ \n -> choose (0, n)
      rawEntries <- mkRaw numEntries
      let entries = mkList rawEntries
      return SimpleTarArchive {
          simpleTarEntries = mkEntries entries
        , simpleTarRaw     = rawEntries
        , simpleTarBS      = Tar.write entries
        }
    where
      mkRaw :: Int -> Gen [(FilePath, LBS.ByteString)]
      mkRaw 0 = return []
      mkRaw n = do
         -- Pick a size around 0, 1, or 2 block boundaries
         sz <- sized $ \n -> elements (take n fileSizes)
         bs <- LBS.pack `fmap` vectorOf sz arbitrary
         es <- mkRaw (n - 1)
         return $ ("file" ++ show n, bs) : es

      mkList :: [(FilePath, LBS.ByteString)] -> [Tar.Entry]
      mkList []            = []
      mkList ((fp, bs):es) = entry : mkList es
        where
          Right path = toTarPath False fp
          entry   = simpleEntry path content
          content = NormalFile bs (LBS.length bs)

      mkEntries :: [Tar.Entry] -> Tar.Entries ()
      mkEntries []     = Tar.Done
      mkEntries (e:es) = Tar.Next e (mkEntries es)

      -- Sizes around 0, 1, and 2 block boundaries
      fileSizes :: [Int]
      fileSizes = [
                           0 ,    1 ,    2
        ,  510 ,  511 ,  512 ,  513 ,  514
        , 1022 , 1023 , 1024 , 1025 , 1026
        ]

-- | 'IndexBuilder' constructed from a 'SimpleIndex'
newtype SimpleIndexBuilder = SimpleIndexBuilder IndexBuilder
  deriving Show

instance Arbitrary SimpleIndexBuilder where
  arbitrary = SimpleIndexBuilder . build' . simpleTarEntries <$> arbitrary
    where
      -- like 'build', but don't finalize
      build' :: Show e => Entries e -> IndexBuilder
      build' = go empty
        where
          go !builder (Next e es) = go (addNextEntry e builder) es
          go !builder  Done       = builder
          go !_       (Fail err)  = error (show err)

prop_finalise_unfinalise :: SimpleIndexBuilder -> Bool
prop_finalise_unfinalise (SimpleIndexBuilder index) =
    unfinalise (finalise index) == index

#endif

toStrict :: LBS.ByteString -> BS.ByteString
#if MIN_VERSION_bytestring(0,10,0)
toStrict :: ByteString -> ByteString
toStrict = ByteString -> ByteString
LBS.toStrict
#else
toStrict = BS.concat . LBS.toChunks
#endif

#if !(MIN_VERSION_base(4,5,0))
(<>) :: Monoid m => m -> m -> m
(<>) = mappend
#endif