{-# LANGUAGE CPP, BangPatterns, PatternGuards #-}
{-# LANGUAGE DeriveDataTypeable, ScopedTypeVariables #-}

module Codec.Archive.Tar.Index.IntTrie (

  IntTrie,
  construct,
  toList,

  IntTrieBuilder,
  empty,
  insert,
  finalise,
  unfinalise,

  lookup,
  TrieLookup(..),

  serialise,
  serialiseSize,
  deserialise,

#ifdef TESTS
  test1, test2, test3,
  ValidPaths(..),
  prop_lookup,
  prop_completions,
  prop_lookup_mono,
  prop_completions_mono,
  prop_construct_toList,
  prop_finalise_unfinalise,
  prop_serialise_deserialise,
  prop_serialiseSize,
#endif
 ) where

import Prelude hiding (lookup)

import Data.Typeable (Typeable)

import qualified Data.Array.Unboxed as A
import Data.Array.IArray  ((!))
import qualified Data.Bits as Bits
import Data.Word (Word32)
import Data.Bits
import Data.Monoid (Monoid(..))
#if (MIN_VERSION_base(4,5,0))
import Data.Monoid ((<>))
#endif
import qualified Data.ByteString        as BS
import qualified Data.ByteString.Lazy   as LBS
import qualified Data.ByteString.Unsafe as BS
#if MIN_VERSION_bytestring(0,10,2) || defined(MIN_VERSION_bytestring_builder)
import Data.ByteString.Builder          as BS
#else
import Data.ByteString.Lazy.Builder     as BS
#endif
import Control.Exception (assert)
#if MIN_VERSION_containers(0,5,0)
import qualified Data.Map.Strict        as Map
import qualified Data.IntMap.Strict     as IntMap
import Data.IntMap.Strict (IntMap)
#else
import qualified Data.Map               as Map
import qualified Data.IntMap            as IntMap
import Data.IntMap (IntMap)
#endif

import Data.List hiding (lookup, insert)
import Data.Function (on)

#ifdef TESTS
import Test.QuickCheck
import Control.Applicative ((<$>), (<*>))
#endif


-- | A compact mapping from sequences of nats to nats.
--
-- NOTE: The tries in this module have values /only/ at the leaves (which
-- correspond to files), they do not have values at the branch points (which
-- correspond to directories).
newtype IntTrie k v = IntTrie (A.UArray Word32 Word32)
    deriving (IntTrie k v -> IntTrie k v -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
forall k v. IntTrie k v -> IntTrie k v -> Bool
/= :: IntTrie k v -> IntTrie k v -> Bool
$c/= :: forall k v. IntTrie k v -> IntTrie k v -> Bool
== :: IntTrie k v -> IntTrie k v -> Bool
$c== :: forall k v. IntTrie k v -> IntTrie k v -> Bool
Eq, Int -> IntTrie k v -> ShowS
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
forall k v. Int -> IntTrie k v -> ShowS
forall k v. [IntTrie k v] -> ShowS
forall k v. IntTrie k v -> String
showList :: [IntTrie k v] -> ShowS
$cshowList :: forall k v. [IntTrie k v] -> ShowS
show :: IntTrie k v -> String
$cshow :: forall k v. IntTrie k v -> String
showsPrec :: Int -> IntTrie k v -> ShowS
$cshowsPrec :: forall k v. Int -> IntTrie k v -> ShowS
Show, Typeable)


-- Compact, read-only implementation of a trie. It's intended for use with file
-- paths, but we do that via string ids.

#ifdef TESTS
-- Example mapping:
--
example0 :: [(FilePath, Int)]
example0 =
  [("foo-1.0/foo-1.0.cabal", 512)   -- tar block 1
  ,("foo-1.0/LICENSE",       2048)  -- tar block 4
  ,("foo-1.0/Data/Foo.hs",   4096)] -- tar block 8

-- After converting path components to integers this becomes:
--
example1 :: [([Word32], Word32)]
example1 =
  [([1,2],   512)
  ,([1,3],   2048)
  ,([1,4,5], 4096)]

-- As a trie this looks like:

--  [ (1, *) ]
--        |
--        [ (2, 512), (3, 1024), (4, *) ]
--                                   |
--                                   [ (5, 4096) ]

-- We use an intermediate trie representation

mktrie :: [(Int, TrieNode k v)] -> IntTrieBuilder k v
mkleaf :: (Enum k, Enum v) => k -> v                  -> (Int, TrieNode k v)
mknode ::  Enum k          => k -> IntTrieBuilder k v -> (Int, TrieNode k v)

mktrie = IntTrieBuilder . IntMap.fromList
mkleaf k v = (fromEnum k, TrieLeaf (enumToWord32 v))
mknode k t = (fromEnum k, TrieNode t) 

example2 :: IntTrieBuilder Word32 Word32
example2 = mktrie [ mknode 1 t1 ]
  where
    t1   = mktrie [ mkleaf 2 512, mkleaf 3 2048, mknode 4 t2 ]
    t2   = mktrie [ mkleaf 5 4096 ]


example2' :: IntTrieBuilder Word32 Word32
example2' = mktrie [ mknode 0 t1 ]
  where
    t1   = mktrie [ mknode 3 t2 ]
    t2   = mktrie [ mknode 1 t3, mknode 2 t4 ]
    t3   = mktrie [ mkleaf 4 10608 ]
    t4   = mktrie [ mkleaf 4 10612 ]
{-
0: [1,N0,3]

  3: [1,N3,6]

   6: [2,N1,N2,11,12]

     11: [1,4,10608]
     14: [1,4,10612]
-}

example2'' :: IntTrieBuilder Word32 Word32
example2'' = mktrie [ mknode 1 t1, mknode 2 t2 ]
  where
    t1   = mktrie [ mkleaf 4 10608 ]
    t2   = mktrie [ mkleaf 4 10612 ]

example2''' :: IntTrieBuilder Word32 Word32
example2''' = mktrie [ mknode 0 t3 ]
  where
    t3  = mktrie [ mknode 4 t8, mknode 6 t11 ]
    t8  = mktrie [ mknode 1 t14 ]
    t11 = mktrie [ mkleaf 5 10605 ]
    t14 = mktrie [ mknode 2 t19, mknode 3 t22 ]
    t19 = mktrie [ mkleaf 7 10608 ]
    t22 = mktrie [ mkleaf 7 10612 ]
{-
 0: [1,N0,3]
 3: [2,N4,N6,8,11]
 8: [1,N1,11]
11: [1,5,10605]
14: [2,N2,N3,16,19]
19: [1,7,10608]
22: [1,7,10612]
-}

-- We convert from the 'Paths' to the 'IntTrieBuilder' using 'inserts':
--
test1 = example2 == inserts example1 empty
#endif

-- Each node has a size and a sequence of keys followed by an equal length
-- sequnce of corresponding entries. Since we're going to flatten this into
-- a single array then we will need to replace the trie structure with pointers
-- represented as array offsets.

-- Each node is a pair of arrays, one of keys and one of Either value pointer.
-- We need to distinguish values from internal pointers. We use a tag bit:
--
tagLeaf, tagNode, untag :: Word32 -> Word32
tagLeaf :: Word32 -> Word32
tagLeaf = forall a. a -> a
id
tagNode :: Word32 -> Word32
tagNode = forall a b c. (a -> b -> c) -> b -> a -> c
flip forall a. Bits a => a -> Int -> a
Bits.setBit   Int
31
untag :: Word32 -> Word32
untag   = forall a b c. (a -> b -> c) -> b -> a -> c
flip forall a. Bits a => a -> Int -> a
Bits.clearBit Int
31

isNode :: Word32 -> Bool
isNode :: Word32 -> Bool
isNode = forall a b c. (a -> b -> c) -> b -> a -> c
flip forall a. Bits a => a -> Int -> Bool
Bits.testBit Int
31

-- So the overall array form of the above trie is:
--
-- offset:   0   1    2    3   4  5  6    7    8     9     10  11  12
-- array:  [ 1 | N1 | 3 ][ 3 | 2, 3, N4 | 512, 2048, 10 ][ 1 | 5 | 4096 ]
--                     \__/                           \___/

#ifdef TESTS
example3 :: [Word32]
example3 =
 [1, tagNode 1,
     3,
  3, tagLeaf 2, tagLeaf 3, tagNode 4,
     512,       2048,      10,
  1, tagLeaf 5,
     4096
 ]

-- We get the array form by using flattenTrie:

test2 = example3 == flattenTrie example2

example4 :: IntTrie Int Int
example4 = IntTrie (mkArray example3)

mkArray :: [Word32] -> A.UArray Word32 Word32
mkArray xs = A.listArray (0, fromIntegral (length xs) - 1) xs

test3 = case lookup example4 [1] of
          Just (Completions [(2,_),(3,_),(4,_)]) -> True
          _                          -> False

test1, test2, test3 :: Bool
#endif

-------------------------------------
-- Decoding the trie array form
--

completionsFrom :: (Enum k, Enum v) => IntTrie k v -> Word32 -> Completions k v
completionsFrom :: forall k v.
(Enum k, Enum v) =>
IntTrie k v -> Word32 -> Completions k v
completionsFrom trie :: IntTrie k v
trie@(IntTrie UArray Word32 Word32
arr) Word32
nodeOff =
    [ (forall n. Enum n => Word32 -> n
word32ToEnum (Word32 -> Word32
untag Word32
key), TrieLookup k v
next)
    | Word32
keyOff <- [Word32
keysStart..Word32
keysEnd]
    , let key :: Word32
key   = UArray Word32 Word32
arr forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> i -> e
! Word32
keyOff
          entry :: Word32
entry = UArray Word32 Word32
arr forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> i -> e
! (Word32
keyOff forall a. Num a => a -> a -> a
+ Word32
nodeSize)
          next :: TrieLookup k v
next | Word32 -> Bool
isNode Word32
key = forall k v. Completions k v -> TrieLookup k v
Completions (forall k v.
(Enum k, Enum v) =>
IntTrie k v -> Word32 -> Completions k v
completionsFrom IntTrie k v
trie Word32
entry)
               | Bool
otherwise  = forall k v. v -> TrieLookup k v
Entry (forall n. Enum n => Word32 -> n
word32ToEnum Word32
entry)
    ]
  where
    nodeSize :: Word32
nodeSize  = UArray Word32 Word32
arr forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> i -> e
! Word32
nodeOff
    keysStart :: Word32
keysStart = Word32
nodeOff forall a. Num a => a -> a -> a
+ Word32
1
    keysEnd :: Word32
keysEnd   = Word32
nodeOff forall a. Num a => a -> a -> a
+ Word32
nodeSize

-- | Convert the trie to a list
--
-- This is the left inverse to 'construct' (modulo ordering).
toList :: forall k v. (Enum k, Enum v) => IntTrie k v -> [([k], v)]
toList :: forall k v. (Enum k, Enum v) => IntTrie k v -> [([k], v)]
toList = forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap ([k] -> (k, TrieLookup k v) -> [([k], v)]
aux []) forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall k v.
(Enum k, Enum v) =>
IntTrie k v -> Word32 -> Completions k v
`completionsFrom` Word32
0)
  where
    aux :: [k] -> (k, TrieLookup k v) -> [([k], v)]
    aux :: [k] -> (k, TrieLookup k v) -> [([k], v)]
aux [k]
ks (k
k, Entry v
v)        = [(forall a. [a] -> [a]
reverse (k
kforall a. a -> [a] -> [a]
:[k]
ks), v
v)]
    aux [k]
ks (k
k, Completions Completions k v
cs) = forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap ([k] -> (k, TrieLookup k v) -> [([k], v)]
aux (k
kforall a. a -> [a] -> [a]
:[k]
ks)) Completions k v
cs

-------------------------------------
-- Toplevel trie array construction
--

-- So constructing the 'IntTrie' as a whole is just a matter of stringing
-- together all the bits

-- | Build an 'IntTrie' from a bunch of (key, value) pairs, where the keys
-- are sequences.
--
construct :: (Enum k, Enum v) => [([k], v)] -> IntTrie k v
construct :: forall k v. (Enum k, Enum v) => [([k], v)] -> IntTrie k v
construct = forall k v. IntTrieBuilder k v -> IntTrie k v
finalise forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b c. (a -> b -> c) -> b -> a -> c
flip forall k v.
(Enum k, Enum v) =>
[([k], v)] -> IntTrieBuilder k v -> IntTrieBuilder k v
inserts forall k v. IntTrieBuilder k v
empty


---------------------------------
-- Looking up in the trie array
--

data TrieLookup  k v = Entry !v | Completions (Completions k v) deriving Int -> TrieLookup k v -> ShowS
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
forall k v. (Show v, Show k) => Int -> TrieLookup k v -> ShowS
forall k v. (Show v, Show k) => [TrieLookup k v] -> ShowS
forall k v. (Show v, Show k) => TrieLookup k v -> String
showList :: [TrieLookup k v] -> ShowS
$cshowList :: forall k v. (Show v, Show k) => [TrieLookup k v] -> ShowS
show :: TrieLookup k v -> String
$cshow :: forall k v. (Show v, Show k) => TrieLookup k v -> String
showsPrec :: Int -> TrieLookup k v -> ShowS
$cshowsPrec :: forall k v. (Show v, Show k) => Int -> TrieLookup k v -> ShowS
Show
type Completions k v = [(k, TrieLookup k v)]

lookup :: forall k v. (Enum k, Enum v) => IntTrie k v -> [k] -> Maybe (TrieLookup k v)
lookup :: forall k v.
(Enum k, Enum v) =>
IntTrie k v -> [k] -> Maybe (TrieLookup k v)
lookup trie :: IntTrie k v
trie@(IntTrie UArray Word32 Word32
arr) = Word32 -> [k] -> Maybe (TrieLookup k v)
go Word32
0
  where
    go :: Word32 -> [k] -> Maybe (TrieLookup k v)
    go :: Word32 -> [k] -> Maybe (TrieLookup k v)
go Word32
nodeOff []     = forall a. a -> Maybe a
Just (Word32 -> TrieLookup k v
completions Word32
nodeOff)
    go Word32
nodeOff (k
k:[k]
ks) = case Word32 -> Word32 -> Maybe Word32
search Word32
nodeOff (Word32 -> Word32
tagLeaf Word32
k') of
      Just Word32
entryOff
        | forall (t :: * -> *) a. Foldable t => t a -> Bool
null [k]
ks   -> forall a. a -> Maybe a
Just (forall {v} {k}. Enum v => Word32 -> TrieLookup k v
entry Word32
entryOff)
        | Bool
otherwise -> forall a. Maybe a
Nothing
      Maybe Word32
Nothing       -> case Word32 -> Word32 -> Maybe Word32
search Word32
nodeOff (Word32 -> Word32
tagNode Word32
k') of
        Maybe Word32
Nothing       -> forall a. Maybe a
Nothing
        Just Word32
entryOff -> Word32 -> [k] -> Maybe (TrieLookup k v)
go (UArray Word32 Word32
arr forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> i -> e
! Word32
entryOff) [k]
ks
      where
        k' :: Word32
k' = forall n. Enum n => n -> Word32
enumToWord32 k
k

    entry :: Word32 -> TrieLookup k v
entry       Word32
entryOff = forall k v. v -> TrieLookup k v
Entry (forall n. Enum n => Word32 -> n
word32ToEnum (UArray Word32 Word32
arr forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> i -> e
! Word32
entryOff))
    completions :: Word32 -> TrieLookup k v
completions Word32
nodeOff  = forall k v. Completions k v -> TrieLookup k v
Completions (forall k v.
(Enum k, Enum v) =>
IntTrie k v -> Word32 -> Completions k v
completionsFrom IntTrie k v
trie Word32
nodeOff)

    search :: Word32 -> Word32 -> Maybe Word32
    search :: Word32 -> Word32 -> Maybe Word32
search Word32
nodeOff Word32
key = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (forall a. Num a => a -> a -> a
+Word32
nodeSize) (Word32 -> Word32 -> Word32 -> Maybe Word32
bsearch Word32
keysStart Word32
keysEnd Word32
key)
      where
        nodeSize :: Word32
nodeSize  = UArray Word32 Word32
arr forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> i -> e
! Word32
nodeOff
        keysStart :: Word32
keysStart = Word32
nodeOff forall a. Num a => a -> a -> a
+ Word32
1
        keysEnd :: Word32
keysEnd   = Word32
nodeOff forall a. Num a => a -> a -> a
+ Word32
nodeSize

    bsearch :: Word32 -> Word32 -> Word32 -> Maybe Word32
    bsearch :: Word32 -> Word32 -> Word32 -> Maybe Word32
bsearch Word32
a Word32
b Word32
key
      | Word32
a forall a. Ord a => a -> a -> Bool
> Word32
b     = forall a. Maybe a
Nothing
      | Bool
otherwise = case forall a. Ord a => a -> a -> Ordering
compare Word32
key (UArray Word32 Word32
arr forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> i -> e
! Word32
mid) of
          Ordering
LT -> Word32 -> Word32 -> Word32 -> Maybe Word32
bsearch Word32
a (Word32
midforall a. Num a => a -> a -> a
-Word32
1) Word32
key
          Ordering
EQ -> forall a. a -> Maybe a
Just Word32
mid
          Ordering
GT -> Word32 -> Word32 -> Word32 -> Maybe Word32
bsearch (Word32
midforall a. Num a => a -> a -> a
+Word32
1) Word32
b Word32
key
      where mid :: Word32
mid = (Word32
a forall a. Num a => a -> a -> a
+ Word32
b) forall a. Integral a => a -> a -> a
`div` Word32
2


enumToWord32 :: Enum n => n -> Word32
enumToWord32 :: forall n. Enum n => n -> Word32
enumToWord32 = forall a b. (Integral a, Num b) => a -> b
fromIntegral forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Enum a => a -> Int
fromEnum

word32ToEnum :: Enum n => Word32 -> n
word32ToEnum :: forall n. Enum n => Word32 -> n
word32ToEnum = forall a. Enum a => Int -> a
toEnum forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (Integral a, Num b) => a -> b
fromIntegral


-------------------------
-- Building Tries
--

newtype IntTrieBuilder k v = IntTrieBuilder (IntMap (TrieNode k v))
  deriving (Int -> IntTrieBuilder k v -> ShowS
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
forall k v. Int -> IntTrieBuilder k v -> ShowS
forall k v. [IntTrieBuilder k v] -> ShowS
forall k v. IntTrieBuilder k v -> String
showList :: [IntTrieBuilder k v] -> ShowS
$cshowList :: forall k v. [IntTrieBuilder k v] -> ShowS
show :: IntTrieBuilder k v -> String
$cshow :: forall k v. IntTrieBuilder k v -> String
showsPrec :: Int -> IntTrieBuilder k v -> ShowS
$cshowsPrec :: forall k v. Int -> IntTrieBuilder k v -> ShowS
Show, IntTrieBuilder k v -> IntTrieBuilder k v -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
forall k v. IntTrieBuilder k v -> IntTrieBuilder k v -> Bool
/= :: IntTrieBuilder k v -> IntTrieBuilder k v -> Bool
$c/= :: forall k v. IntTrieBuilder k v -> IntTrieBuilder k v -> Bool
== :: IntTrieBuilder k v -> IntTrieBuilder k v -> Bool
$c== :: forall k v. IntTrieBuilder k v -> IntTrieBuilder k v -> Bool
Eq)

data TrieNode k v = TrieLeaf {-# UNPACK #-} !Word32
                  | TrieNode !(IntTrieBuilder k v)
  deriving (Int -> TrieNode k v -> ShowS
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
forall k v. Int -> TrieNode k v -> ShowS
forall k v. [TrieNode k v] -> ShowS
forall k v. TrieNode k v -> String
showList :: [TrieNode k v] -> ShowS
$cshowList :: forall k v. [TrieNode k v] -> ShowS
show :: TrieNode k v -> String
$cshow :: forall k v. TrieNode k v -> String
showsPrec :: Int -> TrieNode k v -> ShowS
$cshowsPrec :: forall k v. Int -> TrieNode k v -> ShowS
Show, TrieNode k v -> TrieNode k v -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
forall k v. TrieNode k v -> TrieNode k v -> Bool
/= :: TrieNode k v -> TrieNode k v -> Bool
$c/= :: forall k v. TrieNode k v -> TrieNode k v -> Bool
== :: TrieNode k v -> TrieNode k v -> Bool
$c== :: forall k v. TrieNode k v -> TrieNode k v -> Bool
Eq)

empty :: IntTrieBuilder k v
empty :: forall k v. IntTrieBuilder k v
empty = forall k v. IntMap (TrieNode k v) -> IntTrieBuilder k v
IntTrieBuilder forall a. IntMap a
IntMap.empty

insert :: (Enum k, Enum v) => [k] -> v
       -> IntTrieBuilder k v -> IntTrieBuilder k v
insert :: forall k v.
(Enum k, Enum v) =>
[k] -> v -> IntTrieBuilder k v -> IntTrieBuilder k v
insert []    v
_v IntTrieBuilder k v
t = IntTrieBuilder k v
t
insert (k
k:[k]
ks) v
v IntTrieBuilder k v
t = forall k v.
Int -> [Int] -> Word32 -> IntTrieBuilder k v -> IntTrieBuilder k v
insertTrie (forall a. Enum a => a -> Int
fromEnum k
k) (forall a b. (a -> b) -> [a] -> [b]
map forall a. Enum a => a -> Int
fromEnum [k]
ks) (forall n. Enum n => n -> Word32
enumToWord32 v
v) IntTrieBuilder k v
t

insertTrie :: Int -> [Int] -> Word32
           -> IntTrieBuilder k v -> IntTrieBuilder k v
insertTrie :: forall k v.
Int -> [Int] -> Word32 -> IntTrieBuilder k v -> IntTrieBuilder k v
insertTrie Int
k [Int]
ks Word32
v (IntTrieBuilder IntMap (TrieNode k v)
t) =
    forall k v. IntMap (TrieNode k v) -> IntTrieBuilder k v
IntTrieBuilder forall a b. (a -> b) -> a -> b
$
      forall a. (Maybe a -> Maybe a) -> Int -> IntMap a -> IntMap a
IntMap.alter (\Maybe (TrieNode k v)
t' -> forall a. a -> Maybe a
Just forall a b. (a -> b) -> a -> b
$! forall b a. b -> (a -> b) -> Maybe a -> b
maybe (forall k v. [Int] -> Word32 -> TrieNode k v
freshTrieNode  [Int]
ks Word32
v)
                                         (forall k v. [Int] -> Word32 -> TrieNode k v -> TrieNode k v
insertTrieNode [Int]
ks Word32
v) Maybe (TrieNode k v)
t')
                   Int
k IntMap (TrieNode k v)
t

insertTrieNode :: [Int] -> Word32 -> TrieNode k v -> TrieNode k v
insertTrieNode :: forall k v. [Int] -> Word32 -> TrieNode k v -> TrieNode k v
insertTrieNode []     Word32
v  TrieNode k v
_           = forall k v. Word32 -> TrieNode k v
TrieLeaf Word32
v
insertTrieNode (Int
k:[Int]
ks) Word32
v (TrieLeaf Word32
_) = forall k v. IntTrieBuilder k v -> TrieNode k v
TrieNode (forall k v. Int -> [Int] -> Word32 -> IntTrieBuilder k v
freshTrie  Int
k [Int]
ks Word32
v)
insertTrieNode (Int
k:[Int]
ks) Word32
v (TrieNode IntTrieBuilder k v
t) = forall k v. IntTrieBuilder k v -> TrieNode k v
TrieNode (forall k v.
Int -> [Int] -> Word32 -> IntTrieBuilder k v -> IntTrieBuilder k v
insertTrie Int
k [Int]
ks Word32
v IntTrieBuilder k v
t)

freshTrie :: Int -> [Int] -> Word32 -> IntTrieBuilder k v
freshTrie :: forall k v. Int -> [Int] -> Word32 -> IntTrieBuilder k v
freshTrie Int
k []      Word32
v =
  forall k v. IntMap (TrieNode k v) -> IntTrieBuilder k v
IntTrieBuilder (forall a. Int -> a -> IntMap a
IntMap.singleton Int
k (forall k v. Word32 -> TrieNode k v
TrieLeaf Word32
v))
freshTrie Int
k (Int
k':[Int]
ks) Word32
v =
  forall k v. IntMap (TrieNode k v) -> IntTrieBuilder k v
IntTrieBuilder (forall a. Int -> a -> IntMap a
IntMap.singleton Int
k (forall k v. IntTrieBuilder k v -> TrieNode k v
TrieNode (forall k v. Int -> [Int] -> Word32 -> IntTrieBuilder k v
freshTrie Int
k' [Int]
ks Word32
v)))

freshTrieNode :: [Int] -> Word32 -> TrieNode k v
freshTrieNode :: forall k v. [Int] -> Word32 -> TrieNode k v
freshTrieNode []     Word32
v = forall k v. Word32 -> TrieNode k v
TrieLeaf Word32
v
freshTrieNode (Int
k:[Int]
ks) Word32
v = forall k v. IntTrieBuilder k v -> TrieNode k v
TrieNode (forall k v. Int -> [Int] -> Word32 -> IntTrieBuilder k v
freshTrie Int
k [Int]
ks Word32
v)

inserts :: (Enum k, Enum v) => [([k], v)]
        -> IntTrieBuilder k v -> IntTrieBuilder k v
inserts :: forall k v.
(Enum k, Enum v) =>
[([k], v)] -> IntTrieBuilder k v -> IntTrieBuilder k v
inserts [([k], v)]
kvs IntTrieBuilder k v
t = forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (\IntTrieBuilder k v
t' ([k]
ks, v
v) -> forall k v.
(Enum k, Enum v) =>
[k] -> v -> IntTrieBuilder k v -> IntTrieBuilder k v
insert [k]
ks v
v IntTrieBuilder k v
t') IntTrieBuilder k v
t [([k], v)]
kvs

finalise :: IntTrieBuilder k v -> IntTrie k v
finalise :: forall k v. IntTrieBuilder k v -> IntTrie k v
finalise IntTrieBuilder k v
trie =
    forall k v. UArray Word32 Word32 -> IntTrie k v
IntTrie forall a b. (a -> b) -> a -> b
$
      forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
(i, i) -> [e] -> a i e
A.listArray (Word32
0, forall a b. (Integral a, Num b) => a -> b
fromIntegral (forall k v. IntTrieBuilder k v -> Int
flatTrieLength IntTrieBuilder k v
trie) forall a. Num a => a -> a -> a
- Word32
1)
                  (forall k v. IntTrieBuilder k v -> [Word32]
flattenTrie IntTrieBuilder k v
trie)

unfinalise :: (Enum k, Enum v) => IntTrie k v -> IntTrieBuilder k v
unfinalise :: forall k v. (Enum k, Enum v) => IntTrie k v -> IntTrieBuilder k v
unfinalise IntTrie k v
trie =
    forall {k} {n} {k} {v}.
(Enum k, Enum n) =>
Completions k n -> IntTrieBuilder k v
go (forall k v.
(Enum k, Enum v) =>
IntTrie k v -> Word32 -> Completions k v
completionsFrom IntTrie k v
trie Word32
0)
  where
    go :: Completions k n -> IntTrieBuilder k v
go Completions k n
kns =
      forall k v. IntMap (TrieNode k v) -> IntTrieBuilder k v
IntTrieBuilder forall a b. (a -> b) -> a -> b
$
        forall a. [(Int, a)] -> IntMap a
IntMap.fromList
          [ (forall a. Enum a => a -> Int
fromEnum k
k, TrieNode k v
t)
          | (k
k, TrieLookup k n
n) <- Completions k n
kns
          , let t :: TrieNode k v
t = case TrieLookup k n
n of
                      Entry       n
v    -> forall k v. Word32 -> TrieNode k v
TrieLeaf (forall n. Enum n => n -> Word32
enumToWord32 n
v)
                      Completions Completions k n
kns' -> forall k v. IntTrieBuilder k v -> TrieNode k v
TrieNode (Completions k n -> IntTrieBuilder k v
go Completions k n
kns')
          ]

---------------------------------
-- Flattening Tries
--

type Offset = Int

flatTrieLength :: IntTrieBuilder k v -> Int
flatTrieLength :: forall k v. IntTrieBuilder k v -> Int
flatTrieLength (IntTrieBuilder IntMap (TrieNode k v)
tns) =
    Int
1
  forall a. Num a => a -> a -> a
+ Int
2 forall a. Num a => a -> a -> a
* forall a. IntMap a -> Int
IntMap.size IntMap (TrieNode k v)
tns
  forall a. Num a => a -> a -> a
+ forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [ forall k v. IntTrieBuilder k v -> Int
flatTrieLength IntTrieBuilder k v
n | TrieNode IntTrieBuilder k v
n <- forall a. IntMap a -> [a]
IntMap.elems IntMap (TrieNode k v)
tns ]

-- This is a breadth-first traversal. We keep a list of the tries that we are
-- to write out next. Each of these have an offset allocated to them at the
-- time we put them into the list. We keep a running offset so we know where
-- to allocate next.
--
flattenTrie :: IntTrieBuilder k v -> [Word32]
flattenTrie :: forall k v. IntTrieBuilder k v -> [Word32]
flattenTrie IntTrieBuilder k v
trie = forall k v. Q (IntTrieBuilder k v) -> Int -> [Word32]
go (forall a. [a] -> Q a
queue [IntTrieBuilder k v
trie]) (forall k v. IntTrieBuilder k v -> Int
size IntTrieBuilder k v
trie)
  where
    size :: IntTrieBuilder k v -> Int
size (IntTrieBuilder IntMap (TrieNode k v)
tns) = Int
1 forall a. Num a => a -> a -> a
+ Int
2 forall a. Num a => a -> a -> a
* forall a. IntMap a -> Int
IntMap.size IntMap (TrieNode k v)
tns

    go :: Q (IntTrieBuilder k v) -> Offset -> [Word32]
    go :: forall k v. Q (IntTrieBuilder k v) -> Int -> [Word32]
go Q (IntTrieBuilder k v)
todo !Int
offset =
      case forall a. Q a -> Maybe (a, Q a)
dequeue Q (IntTrieBuilder k v)
todo of
        Maybe (IntTrieBuilder k v, Q (IntTrieBuilder k v))
Nothing                   -> []
        Just (IntTrieBuilder IntMap (TrieNode k v)
tnodes, Q (IntTrieBuilder k v)
tries) ->
            [Word32]
flat forall a. [a] -> [a] -> [a]
++ forall k v. Q (IntTrieBuilder k v) -> Int -> [Word32]
go Q (IntTrieBuilder k v)
tries' Int
offset'
          where
            !count :: Int
count = forall a. IntMap a -> Int
IntMap.size IntMap (TrieNode k v)
tnodes
            flat :: [Word32]
flat   = forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
count
                   forall a. a -> [a] -> [a]
: forall k a. Map k a -> [k]
Map.keys  Map Word32 Word32
keysValues
                  forall a. [a] -> [a] -> [a]
++ forall k a. Map k a -> [a]
Map.elems Map Word32 Word32
keysValues
            (!Int
offset', !Map Word32 Word32
keysValues, !Q (IntTrieBuilder k v)
tries') =
#if MIN_VERSION_containers(0,4,2)
              forall a b. (a -> Int -> b -> a) -> a -> IntMap b -> a
IntMap.foldlWithKey' forall k v.
(Int, Map Word32 Word32, Q (IntTrieBuilder k v))
-> Int
-> TrieNode k v
-> (Int, Map Word32 Word32, Q (IntTrieBuilder k v))
accumNodes
                                   (Int
offset, forall k a. Map k a
Map.empty, Q (IntTrieBuilder k v)
tries)
                                   IntMap (TrieNode k v)
tnodes
#else
              foldl' (\a (k,v) -> accumNodes a k v)
                     (offset, Map.empty, tries)
                     (IntMap.toList tnodes)
#endif

    accumNodes :: (Offset, Map.Map Word32 Word32, Q (IntTrieBuilder k v))
               -> Int -> TrieNode k v
               -> (Offset, Map.Map Word32 Word32, Q (IntTrieBuilder k v))
    accumNodes :: forall k v.
(Int, Map Word32 Word32, Q (IntTrieBuilder k v))
-> Int
-> TrieNode k v
-> (Int, Map Word32 Word32, Q (IntTrieBuilder k v))
accumNodes (!Int
off, !Map Word32 Word32
kvs, !Q (IntTrieBuilder k v)
tries) !Int
k (TrieLeaf Word32
v) =
        (Int
off, Map Word32 Word32
kvs', Q (IntTrieBuilder k v)
tries)
      where
        kvs' :: Map Word32 Word32
kvs' = forall k a. Ord k => k -> a -> Map k a -> Map k a
Map.insert (Word32 -> Word32
tagLeaf (Int -> Word32
int2Word32 Int
k)) Word32
v Map Word32 Word32
kvs

    accumNodes (!Int
off, !Map Word32 Word32
kvs, !Q (IntTrieBuilder k v)
tries) !Int
k (TrieNode IntTrieBuilder k v
t) =
        (Int
off forall a. Num a => a -> a -> a
+ forall k v. IntTrieBuilder k v -> Int
size IntTrieBuilder k v
t, Map Word32 Word32
kvs', Q (IntTrieBuilder k v)
tries')
      where
        kvs' :: Map Word32 Word32
kvs'   = forall k a. Ord k => k -> a -> Map k a -> Map k a
Map.insert (Word32 -> Word32
tagNode (Int -> Word32
int2Word32 Int
k)) (Int -> Word32
int2Word32 Int
off) Map Word32 Word32
kvs
        tries' :: Q (IntTrieBuilder k v)
tries' = forall a. Q a -> a -> Q a
enqueue Q (IntTrieBuilder k v)
tries IntTrieBuilder k v
t

data Q a = Q [a] [a]

queue :: [a] -> Q a
queue :: forall a. [a] -> Q a
queue [a]
xs = forall a. [a] -> [a] -> Q a
Q [a]
xs []

enqueue :: Q a -> a -> Q a
enqueue :: forall a. Q a -> a -> Q a
enqueue (Q [a]
front  [a]
back) a
x = forall a. [a] -> [a] -> Q a
Q [a]
front (a
x forall a. a -> [a] -> [a]
: [a]
back)

dequeue :: Q a -> Maybe (a, Q a)
dequeue :: forall a. Q a -> Maybe (a, Q a)
dequeue (Q (a
x:[a]
xs) [a]
back)    = forall a. a -> Maybe a
Just (a
x, forall a. [a] -> [a] -> Q a
Q [a]
xs [a]
back)
dequeue (Q []     [a]
back)    = case forall a. [a] -> [a]
reverse [a]
back of
                               a
x:[a]
xs -> forall a. a -> Maybe a
Just (a
x, forall a. [a] -> [a] -> Q a
Q [a]
xs [])
                               []   -> forall a. Maybe a
Nothing

int2Word32 :: Int -> Word32
int2Word32 :: Int -> Word32
int2Word32 = forall a b. (Integral a, Num b) => a -> b
fromIntegral


-------------------------
-- (de)serialisation
--

serialise :: IntTrie k v -> BS.Builder
serialise :: forall k v. IntTrie k v -> Builder
serialise (IntTrie UArray Word32 Word32
arr) =
    let (Word32
_, !Word32
ixEnd) = forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> (i, i)
A.bounds UArray Word32 Word32
arr in
    Word32 -> Builder
BS.word32BE (Word32
ixEndforall a. Num a => a -> a -> a
+Word32
1)
 forall a. Semigroup a => a -> a -> a
<> forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (\Word32
n Builder
r -> Word32 -> Builder
BS.word32BE Word32
n forall a. Semigroup a => a -> a -> a
<> Builder
r) forall a. Monoid a => a
mempty (forall (a :: * -> * -> *) e i. (IArray a e, Ix i) => a i e -> [e]
A.elems UArray Word32 Word32
arr)

serialiseSize :: IntTrie k v -> Int
serialiseSize :: forall k v. IntTrie k v -> Int
serialiseSize (IntTrie UArray Word32 Word32
arr) =
    let (Word32
_, Word32
ixEnd) = forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> (i, i)
A.bounds UArray Word32 Word32
arr in
    Int
4
  forall a. Num a => a -> a -> a
+ Int
4 forall a. Num a => a -> a -> a
* (forall a b. (Integral a, Num b) => a -> b
fromIntegral Word32
ixEnd forall a. Num a => a -> a -> a
+ Int
1)

deserialise :: BS.ByteString -> Maybe (IntTrie k v, BS.ByteString)
deserialise :: forall k v. ByteString -> Maybe (IntTrie k v, ByteString)
deserialise ByteString
bs
  | ByteString -> Int
BS.length ByteString
bs forall a. Ord a => a -> a -> Bool
>= Int
4
  , let lenArr :: Word32
lenArr   = ByteString -> Int -> Word32
readWord32BE ByteString
bs Int
0
        lenTotal :: Int
lenTotal = Int
4 forall a. Num a => a -> a -> a
+ Int
4 forall a. Num a => a -> a -> a
* forall a b. (Integral a, Num b) => a -> b
fromIntegral Word32
lenArr
  , ByteString -> Int
BS.length ByteString
bs forall a. Ord a => a -> a -> Bool
>= Int
4 forall a. Num a => a -> a -> a
+ Int
4 forall a. Num a => a -> a -> a
* forall a b. (Integral a, Num b) => a -> b
fromIntegral Word32
lenArr
  , let !arr :: UArray Word32 Word32
arr = forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
(i, i) -> [(i, e)] -> a i e
A.array (Word32
0, Word32
lenArrforall a. Num a => a -> a -> a
-Word32
1)
                      [ (Word32
i, ByteString -> Int -> Word32
readWord32BE ByteString
bs Int
off)
                      | (Word32
i, Int
off) <- forall a b. [a] -> [b] -> [(a, b)]
zip [Word32
0..Word32
lenArrforall a. Num a => a -> a -> a
-Word32
1] [Int
4,Int
8 .. Int
lenTotal forall a. Num a => a -> a -> a
- Int
4] ]
        !bs' :: ByteString
bs' = Int -> ByteString -> ByteString
BS.drop Int
lenTotal ByteString
bs
  = forall a. a -> Maybe a
Just (forall k v. UArray Word32 Word32 -> IntTrie k v
IntTrie UArray Word32 Word32
arr, ByteString
bs')

  | Bool
otherwise
  = forall a. Maybe a
Nothing

readWord32BE :: BS.ByteString -> Int -> Word32
readWord32BE :: ByteString -> Int -> Word32
readWord32BE ByteString
bs Int
i =
    forall a. (?callStack::CallStack) => Bool -> a -> a
assert (Int
i forall a. Ord a => a -> a -> Bool
>= Int
0 Bool -> Bool -> Bool
&& Int
iforall a. Num a => a -> a -> a
+Int
3 forall a. Ord a => a -> a -> Bool
<= ByteString -> Int
BS.length ByteString
bs forall a. Num a => a -> a -> a
- Int
1) forall a b. (a -> b) -> a -> b
$
    forall a b. (Integral a, Num b) => a -> b
fromIntegral (ByteString -> Int -> Word8
BS.unsafeIndex ByteString
bs (Int
i forall a. Num a => a -> a -> a
+ Int
0)) forall a. Bits a => a -> Int -> a
`shiftL` Int
24
  forall a. Num a => a -> a -> a
+ forall a b. (Integral a, Num b) => a -> b
fromIntegral (ByteString -> Int -> Word8
BS.unsafeIndex ByteString
bs (Int
i forall a. Num a => a -> a -> a
+ Int
1)) forall a. Bits a => a -> Int -> a
`shiftL` Int
16
  forall a. Num a => a -> a -> a
+ forall a b. (Integral a, Num b) => a -> b
fromIntegral (ByteString -> Int -> Word8
BS.unsafeIndex ByteString
bs (Int
i forall a. Num a => a -> a -> a
+ Int
2)) forall a. Bits a => a -> Int -> a
`shiftL` Int
8
  forall a. Num a => a -> a -> a
+ forall a b. (Integral a, Num b) => a -> b
fromIntegral (ByteString -> Int -> Word8
BS.unsafeIndex ByteString
bs (Int
i forall a. Num a => a -> a -> a
+ Int
3))


-------------------------
-- Correctness property
--

#ifdef TESTS

prop_lookup :: (Ord k, Enum k, Eq v, Enum v, Show k, Show v)
            => [([k], v)] -> Bool
prop_lookup paths =
  flip all paths $ \(key, value) ->
    case lookup trie key of
      Just (Entry value') | value' == value -> True
      Just (Entry value')   -> error $ "IntTrie: " ++ show (key, value, value')
      Nothing               -> error $ "IntTrie: didn't find " ++ show key
      Just (Completions xs) -> error $ "IntTrie: " ++ show xs

  where
    trie = construct paths

prop_completions :: forall k v. (Ord k, Enum k, Eq v, Enum v) => [([k], v)] -> Bool
prop_completions paths =
    inserts paths empty 
 == convertCompletions (completionsFrom (construct paths) 0)
  where
    convertCompletions :: Ord k => Completions k v -> IntTrieBuilder k v
    convertCompletions kls =
      IntTrieBuilder $
        IntMap.fromList
          [ case l of
              Entry v          -> mkleaf k v
              Completions kls' -> mknode k (convertCompletions kls')
          | (k, l) <- sortBy (compare `on` fst) kls ]


prop_lookup_mono :: ValidPaths -> Bool
prop_lookup_mono (ValidPaths paths) = prop_lookup paths

prop_completions_mono :: ValidPaths -> Bool
prop_completions_mono (ValidPaths paths) = prop_completions paths

prop_construct_toList :: ValidPaths -> Bool
prop_construct_toList (ValidPaths paths) =
       sortBy (compare `on` fst) (toList (construct paths))
    == sortBy (compare `on` fst) paths

prop_finalise_unfinalise :: ValidPaths -> Bool
prop_finalise_unfinalise (ValidPaths paths) =
    builder == unfinalise (finalise builder)
  where
    builder :: IntTrieBuilder Char Char
    builder = inserts paths empty

prop_serialise_deserialise :: ValidPaths -> Bool
prop_serialise_deserialise (ValidPaths paths) =
    Just (trie, BS.empty) == (deserialise
                            . toStrict . BS.toLazyByteString
                            . serialise) trie
  where
    trie :: IntTrie Char Char
    trie = construct paths

prop_serialiseSize :: ValidPaths -> Bool
prop_serialiseSize (ValidPaths paths) =
    (fromIntegral . LBS.length . BS.toLazyByteString . serialise) trie
 == serialiseSize trie
  where
    trie :: IntTrie Char Char
    trie = construct paths

newtype ValidPaths = ValidPaths [([Char], Char)] deriving Show

instance Arbitrary ValidPaths where
  arbitrary =
      ValidPaths . makeNoPrefix <$> listOf ((,) <$> listOf1 arbitrary <*> arbitrary)
    where
      makeNoPrefix [] = []
      makeNoPrefix ((k,v):kvs)
        | all (\(k', _) -> not (isPrefixOfOther k k')) kvs
                     = (k,v) : makeNoPrefix kvs
        | otherwise  =         makeNoPrefix kvs

  shrink (ValidPaths kvs) =
      map ValidPaths . filter noPrefix . filter nonEmpty . shrink $ kvs
    where
      noPrefix []           = True
      noPrefix ((k,_):kvs') = all (\(k', _) -> not (isPrefixOfOther k k')) kvs'
                           && noPrefix kvs'
      nonEmpty = all (not . null . fst)

isPrefixOfOther a b = a `isPrefixOf` b || b `isPrefixOf` a

toStrict :: LBS.ByteString -> BS.ByteString
#if MIN_VERSION_bytestring(0,10,0)
toStrict = LBS.toStrict
#else
toStrict = BS.concat . LBS.toChunks
#endif

#endif

#if !(MIN_VERSION_base(4,5,0))
(<>) :: Monoid m => m -> m -> m
(<>) = mappend
#endif