{-# LANGUAGE CPP #-}

module TextBuilderDev.Allocator
  ( -- * Execution
    allocate,

    -- * Definition
    Allocator,
    force,
    text,
    asciiByteString,
    char,
    unicodeCodePoint,
    utf8CodeUnits1,
    utf8CodeUnits2,
    utf8CodeUnits3,
    utf8CodeUnits4,
    utf16CodeUnits1,
    utf16CodeUnits2,
  )
where

import qualified Data.ByteString as ByteString
import qualified Data.Text as Text
import qualified Data.Text.Array as TextArray
import qualified Data.Text.IO as Text
import qualified Data.Text.Internal as TextInternal
import qualified Data.Text.Lazy as TextLazy
import qualified Data.Text.Lazy.Builder as TextLazyBuilder
import TextBuilderDev.Prelude
import qualified TextBuilderDev.Utf16View as Utf16View
import qualified TextBuilderDev.Utf8View as Utf8View

-- * ArrayWriter

newtype ArrayWriter
  = ArrayWriter (forall s. TextArray.MArray s -> Int -> ST s Int)

instance Semigroup ArrayWriter where
  {-# INLINE (<>) #-}
  ArrayWriter forall s. MArray s -> Int -> ST s Int
writeL <> :: ArrayWriter -> ArrayWriter -> ArrayWriter
<> ArrayWriter forall s. MArray s -> Int -> ST s Int
writeR =
    (forall s. MArray s -> Int -> ST s Int) -> ArrayWriter
ArrayWriter ((forall s. MArray s -> Int -> ST s Int) -> ArrayWriter)
-> (forall s. MArray s -> Int -> ST s Int) -> ArrayWriter
forall a b. (a -> b) -> a -> b
$ \MArray s
array Int
offset -> do
      Int
offsetAfter1 <- MArray s -> Int -> ST s Int
forall s. MArray s -> Int -> ST s Int
writeL MArray s
array Int
offset
      MArray s -> Int -> ST s Int
forall s. MArray s -> Int -> ST s Int
writeR MArray s
array Int
offsetAfter1

instance Monoid ArrayWriter where
  {-# INLINE mempty #-}
  mempty :: ArrayWriter
mempty = (forall s. MArray s -> Int -> ST s Int) -> ArrayWriter
ArrayWriter ((forall s. MArray s -> Int -> ST s Int) -> ArrayWriter)
-> (forall s. MArray s -> Int -> ST s Int) -> ArrayWriter
forall a b. (a -> b) -> a -> b
$ (Int -> ST s Int) -> MArray s -> Int -> ST s Int
forall a b. a -> b -> a
const ((Int -> ST s Int) -> MArray s -> Int -> ST s Int)
-> (Int -> ST s Int) -> MArray s -> Int -> ST s Int
forall a b. (a -> b) -> a -> b
$ Int -> ST s Int
forall a. a -> ST s a
forall (m :: * -> *) a. Monad m => a -> m a
return

-- * Allocator

-- | Execute a builder producing a strict text.
allocate :: Allocator -> Text
allocate :: Allocator -> Text
allocate (Allocator (ArrayWriter forall s. MArray s -> Int -> ST s Int
write) Int
sizeBound) =
  (forall s. ST s Text) -> Text
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s Text) -> Text) -> (forall s. ST s Text) -> Text
forall a b. (a -> b) -> a -> b
$ do
    MArray s
array <- Int -> ST s (MArray s)
forall s. Int -> ST s (MArray s)
TextArray.new Int
sizeBound
    Int
offsetAfter <- MArray s -> Int -> ST s Int
forall s. MArray s -> Int -> ST s Int
write MArray s
array Int
0
    Array
frozenArray <- MArray s -> ST s Array
forall s. MArray s -> ST s Array
TextArray.unsafeFreeze MArray s
array
    Text -> ST s Text
forall a. a -> ST s a
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> ST s Text) -> Text -> ST s Text
forall a b. (a -> b) -> a -> b
$ Array -> Int -> Int -> Text
TextInternal.text Array
frozenArray Int
0 Int
offsetAfter

-- |
-- Specification of how to efficiently construct strict 'Text'.
-- Provides instances of 'Semigroup' and 'Monoid', which have complexity of /O(1)/.
data Allocator
  = Allocator
      !ArrayWriter
      {-# UNPACK #-} !Int

instance Semigroup Allocator where
  {-# INLINE (<>) #-}
  <> :: Allocator -> Allocator -> Allocator
(<>) (Allocator ArrayWriter
writer1 Int
estimatedArraySize1) (Allocator ArrayWriter
writer2 Int
estimatedArraySize2) =
    ArrayWriter -> Int -> Allocator
Allocator ArrayWriter
writer Int
estimatedArraySize
    where
      writer :: ArrayWriter
writer = ArrayWriter
writer1 ArrayWriter -> ArrayWriter -> ArrayWriter
forall a. Semigroup a => a -> a -> a
<> ArrayWriter
writer2
      estimatedArraySize :: Int
estimatedArraySize = Int
estimatedArraySize1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
estimatedArraySize2

instance Monoid Allocator where
  {-# INLINE mempty #-}
  mempty :: Allocator
mempty = ArrayWriter -> Int -> Allocator
Allocator ArrayWriter
forall a. Monoid a => a
mempty Int
0

-- |
-- Run the builder and pack the produced text into a new builder.
--
-- Useful to have around builders that you reuse,
-- because a forced builder is much faster,
-- since it's virtually a single call @memcopy@.
{-# INLINE force #-}
force :: Allocator -> Allocator
force :: Allocator -> Allocator
force = Text -> Allocator
text (Text -> Allocator)
-> (Allocator -> Text) -> Allocator -> Allocator
forall b c a. (b -> c) -> (a -> b) -> a -> c
forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Allocator -> Text
allocate

-- | Strict text.
{-# INLINEABLE text #-}
text :: Text -> Allocator
#if MIN_VERSION_text(2,0,0)
text :: Text -> Allocator
text text :: Text
text@(TextInternal.Text Array
array Int
offset Int
length) =
  ArrayWriter -> Int -> Allocator
Allocator ArrayWriter
writer Int
length
  where
    writer :: ArrayWriter
writer =
      (forall s. MArray s -> Int -> ST s Int) -> ArrayWriter
ArrayWriter ((forall s. MArray s -> Int -> ST s Int) -> ArrayWriter)
-> (forall s. MArray s -> Int -> ST s Int) -> ArrayWriter
forall a b. (a -> b) -> a -> b
$ \MArray s
builderArray Int
builderOffset -> do
        Int -> MArray s -> Int -> Array -> Int -> ST s ()
forall s. Int -> MArray s -> Int -> Array -> Int -> ST s ()
TextArray.copyI Int
length MArray s
builderArray Int
builderOffset Array
array Int
offset
        Int -> ST s Int
forall a. a -> ST s a
forall (m :: * -> *) a. Monad m => a -> m a
return (Int -> ST s Int) -> Int -> ST s Int
forall a b. (a -> b) -> a -> b
$ Int
builderOffset Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
length
#else
text text@(TextInternal.Text array offset length) =
  Allocator writer length
  where
    writer =
      ArrayWriter $ \builderArray builderOffset -> do
        let builderOffsetAfter = builderOffset + length
        TextArray.copyI builderArray builderOffset array offset builderOffsetAfter
        return builderOffsetAfter
#endif

-- | ASCII byte string.
--
-- It's your responsibility to ensure that the bytes are in proper range,
-- otherwise the produced text will be broken.
{-# INLINEABLE asciiByteString #-}
asciiByteString :: ByteString -> Allocator
asciiByteString :: ByteString -> Allocator
asciiByteString ByteString
byteString =
  ArrayWriter -> Int -> Allocator
Allocator ArrayWriter
action Int
length
  where
    length :: Int
length = ByteString -> Int
ByteString.length ByteString
byteString
    action :: ArrayWriter
action =
      (forall s. MArray s -> Int -> ST s Int) -> ArrayWriter
ArrayWriter ((forall s. MArray s -> Int -> ST s Int) -> ArrayWriter)
-> (forall s. MArray s -> Int -> ST s Int) -> ArrayWriter
forall a b. (a -> b) -> a -> b
$ \MArray s
array ->
        let step :: Word8 -> (Int -> ST s Int) -> Int -> ST s Int
step Word8
byte Int -> ST s Int
next Int
index = do
              MArray s -> Int -> Word8 -> ST s ()
forall s. MArray s -> Int -> Word8 -> ST s ()
TextArray.unsafeWrite MArray s
array Int
index (Word8 -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
byte)
              Int -> ST s Int
next (Int -> Int
forall a. Enum a => a -> a
succ Int
index)
         in (Word8 -> (Int -> ST s Int) -> Int -> ST s Int)
-> (Int -> ST s Int) -> ByteString -> Int -> ST s Int
forall a. (Word8 -> a -> a) -> a -> ByteString -> a
ByteString.foldr Word8 -> (Int -> ST s Int) -> Int -> ST s Int
step Int -> ST s Int
forall a. a -> ST s a
forall (m :: * -> *) a. Monad m => a -> m a
return ByteString
byteString

-- | Unicode character.
{-# INLINE char #-}
char :: Char -> Allocator
char :: Char -> Allocator
char = Int -> Allocator
unicodeCodePoint (Int -> Allocator) -> (Char -> Int) -> Char -> Allocator
forall b c a. (b -> c) -> (a -> b) -> a -> c
forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Char -> Int
ord

-- | Unicode code point.
{-# INLINE unicodeCodePoint #-}
unicodeCodePoint :: Int -> Allocator
#if MIN_VERSION_text(2,0,0)
unicodeCodePoint :: Int -> Allocator
unicodeCodePoint Int
x =
  Int -> Utf8View
Utf8View.unicodeCodePoint Int
x Word8 -> Allocator
utf8CodeUnits1 Word8 -> Word8 -> Allocator
utf8CodeUnits2 Word8 -> Word8 -> Word8 -> Allocator
utf8CodeUnits3 Word8 -> Word8 -> Word8 -> Word8 -> Allocator
utf8CodeUnits4
#else
unicodeCodePoint x =
  Utf16View.unicodeCodePoint x utf16CodeUnits1 utf16CodeUnits2
#endif

-- | Single code-unit UTF-8 character.
utf8CodeUnits1 :: Word8 -> Allocator
#if MIN_VERSION_text(2,0,0)
{-# INLINEABLE utf8CodeUnits1 #-}
utf8CodeUnits1 :: Word8 -> Allocator
utf8CodeUnits1 Word8
unit1 = ArrayWriter -> Int -> Allocator
Allocator ArrayWriter
writer Int
1 
  where
    writer :: ArrayWriter
writer = (forall s. MArray s -> Int -> ST s Int) -> ArrayWriter
ArrayWriter ((forall s. MArray s -> Int -> ST s Int) -> ArrayWriter)
-> (forall s. MArray s -> Int -> ST s Int) -> ArrayWriter
forall a b. (a -> b) -> a -> b
$ \MArray s
array Int
offset ->
      MArray s -> Int -> Word8 -> ST s ()
forall s. MArray s -> Int -> Word8 -> ST s ()
TextArray.unsafeWrite MArray s
array Int
offset Word8
unit1
        ST s () -> Int -> ST s Int
forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> Int -> Int
forall a. Enum a => a -> a
succ Int
offset
#else
{-# INLINE utf8CodeUnits1 #-}
utf8CodeUnits1 unit1 =
  Utf16View.utf8CodeUnits1 unit1 utf16CodeUnits1 utf16CodeUnits2
#endif

-- | Double code-unit UTF-8 character.
utf8CodeUnits2 :: Word8 -> Word8 -> Allocator
#if MIN_VERSION_text(2,0,0)
{-# INLINEABLE utf8CodeUnits2 #-}
utf8CodeUnits2 :: Word8 -> Word8 -> Allocator
utf8CodeUnits2 Word8
unit1 Word8
unit2 = ArrayWriter -> Int -> Allocator
Allocator ArrayWriter
writer Int
2 
  where
    writer :: ArrayWriter
writer = (forall s. MArray s -> Int -> ST s Int) -> ArrayWriter
ArrayWriter ((forall s. MArray s -> Int -> ST s Int) -> ArrayWriter)
-> (forall s. MArray s -> Int -> ST s Int) -> ArrayWriter
forall a b. (a -> b) -> a -> b
$ \MArray s
array Int
offset -> do
      MArray s -> Int -> Word8 -> ST s ()
forall s. MArray s -> Int -> Word8 -> ST s ()
TextArray.unsafeWrite MArray s
array Int
offset Word8
unit1
      MArray s -> Int -> Word8 -> ST s ()
forall s. MArray s -> Int -> Word8 -> ST s ()
TextArray.unsafeWrite MArray s
array (Int
offset Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Word8
unit2
      Int -> ST s Int
forall a. a -> ST s a
forall (m :: * -> *) a. Monad m => a -> m a
return (Int -> ST s Int) -> Int -> ST s Int
forall a b. (a -> b) -> a -> b
$ Int
offset Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2
#else
{-# INLINE utf8CodeUnits2 #-}
utf8CodeUnits2 unit1 unit2 =
  Utf16View.utf8CodeUnits2 unit1 unit2 utf16CodeUnits1 utf16CodeUnits2
#endif

-- | Triple code-unit UTF-8 character.
utf8CodeUnits3 :: Word8 -> Word8 -> Word8 -> Allocator
#if MIN_VERSION_text(2,0,0)
{-# INLINEABLE utf8CodeUnits3 #-}
utf8CodeUnits3 :: Word8 -> Word8 -> Word8 -> Allocator
utf8CodeUnits3 Word8
unit1 Word8
unit2 Word8
unit3 = ArrayWriter -> Int -> Allocator
Allocator ArrayWriter
writer Int
3 
  where
    writer :: ArrayWriter
writer = (forall s. MArray s -> Int -> ST s Int) -> ArrayWriter
ArrayWriter ((forall s. MArray s -> Int -> ST s Int) -> ArrayWriter)
-> (forall s. MArray s -> Int -> ST s Int) -> ArrayWriter
forall a b. (a -> b) -> a -> b
$ \MArray s
array Int
offset -> do
      MArray s -> Int -> Word8 -> ST s ()
forall s. MArray s -> Int -> Word8 -> ST s ()
TextArray.unsafeWrite MArray s
array Int
offset Word8
unit1
      MArray s -> Int -> Word8 -> ST s ()
forall s. MArray s -> Int -> Word8 -> ST s ()
TextArray.unsafeWrite MArray s
array (Int
offset Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Word8
unit2
      MArray s -> Int -> Word8 -> ST s ()
forall s. MArray s -> Int -> Word8 -> ST s ()
TextArray.unsafeWrite MArray s
array (Int
offset Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2) Word8
unit3
      Int -> ST s Int
forall a. a -> ST s a
forall (m :: * -> *) a. Monad m => a -> m a
return (Int -> ST s Int) -> Int -> ST s Int
forall a b. (a -> b) -> a -> b
$ Int
offset Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
3
#else
{-# INLINE utf8CodeUnits3 #-}
utf8CodeUnits3 unit1 unit2 unit3 =
  Utf16View.utf8CodeUnits3 unit1 unit2 unit3 utf16CodeUnits1 utf16CodeUnits2
#endif

-- | UTF-8 character out of 4 code units.
utf8CodeUnits4 :: Word8 -> Word8 -> Word8 -> Word8 -> Allocator
#if MIN_VERSION_text(2,0,0)
{-# INLINEABLE utf8CodeUnits4 #-}
utf8CodeUnits4 :: Word8 -> Word8 -> Word8 -> Word8 -> Allocator
utf8CodeUnits4 Word8
unit1 Word8
unit2 Word8
unit3 Word8
unit4 = ArrayWriter -> Int -> Allocator
Allocator ArrayWriter
writer Int
4 
  where
    writer :: ArrayWriter
writer = (forall s. MArray s -> Int -> ST s Int) -> ArrayWriter
ArrayWriter ((forall s. MArray s -> Int -> ST s Int) -> ArrayWriter)
-> (forall s. MArray s -> Int -> ST s Int) -> ArrayWriter
forall a b. (a -> b) -> a -> b
$ \MArray s
array Int
offset -> do
      MArray s -> Int -> Word8 -> ST s ()
forall s. MArray s -> Int -> Word8 -> ST s ()
TextArray.unsafeWrite MArray s
array Int
offset Word8
unit1
      MArray s -> Int -> Word8 -> ST s ()
forall s. MArray s -> Int -> Word8 -> ST s ()
TextArray.unsafeWrite MArray s
array (Int
offset Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Word8
unit2
      MArray s -> Int -> Word8 -> ST s ()
forall s. MArray s -> Int -> Word8 -> ST s ()
TextArray.unsafeWrite MArray s
array (Int
offset Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2) Word8
unit3
      MArray s -> Int -> Word8 -> ST s ()
forall s. MArray s -> Int -> Word8 -> ST s ()
TextArray.unsafeWrite MArray s
array (Int
offset Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
3) Word8
unit4
      Int -> ST s Int
forall a. a -> ST s a
forall (m :: * -> *) a. Monad m => a -> m a
return (Int -> ST s Int) -> Int -> ST s Int
forall a b. (a -> b) -> a -> b
$ Int
offset Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
4
#else
{-# INLINE utf8CodeUnits4 #-}
utf8CodeUnits4 unit1 unit2 unit3 unit4 =
  Utf16View.utf8CodeUnits4 unit1 unit2 unit3 unit4 utf16CodeUnits1 utf16CodeUnits2
#endif

-- | Single code-unit UTF-16 character.
utf16CodeUnits1 :: Word16 -> Allocator
#if MIN_VERSION_text(2,0,0)
{-# INLINE utf16CodeUnits1 #-}
utf16CodeUnits1 :: Word16 -> Allocator
utf16CodeUnits1 = Int -> Allocator
unicodeCodePoint (Int -> Allocator) -> (Word16 -> Int) -> Word16 -> Allocator
forall b c a. (b -> c) -> (a -> b) -> a -> c
forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Word16 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral
#else
{-# INLINEABLE utf16CodeUnits1 #-}
utf16CodeUnits1 unit =
  Allocator writer 1
  where
    writer =
      ArrayWriter $ \array offset ->
        TextArray.unsafeWrite array offset unit
          $> succ offset
#endif

-- | Double code-unit UTF-16 character.
utf16CodeUnits2 :: Word16 -> Word16 -> Allocator
#if MIN_VERSION_text(2,0,0)
{-# INLINE utf16CodeUnits2 #-}
utf16CodeUnits2 :: Word16 -> Word16 -> Allocator
utf16CodeUnits2 Word16
unit1 Word16
unit2 = Int -> Allocator
unicodeCodePoint Int
cp
  where
    cp :: Int
cp = (((Word16 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word16
unit1 Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x3FF) Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
10) Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. (Word16 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word16
unit2 Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x3FF)) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
0x10000
#else
{-# INLINEABLE utf16CodeUnits2 #-}
utf16CodeUnits2 unit1 unit2 =
  Allocator writer 2
  where
    writer =
      ArrayWriter $ \array offset -> do
        TextArray.unsafeWrite array offset unit1
        TextArray.unsafeWrite array (succ offset) unit2
        return $ offset + 2
#endif