www.idziorek.net | blog | contact
February 2018

Miguel’s Base64 Encoder

Initially coded on a cold winter afternoon to fully understand base64 encoding and play with Haskell, which is always an indisputable pleasure. Hacked together in big anger, due to my friend Nick’s fairy tales about saving his encrypted binary data in plain ASCII configuration files,… featuring strange letters and non-printable characters. 😄

After optimizing quite a bit, the encoder performs around 460MB/s on a single core of my i7-4790K.

The source code below was auto-fetched from: https://gitweb.softwarefools.com/?p=miguel/haskell.git;a=blob;f=base64/base64.hs

import qualified Data.ByteString.Internal as BI
import qualified Data.ByteString.Lazy as BL
import qualified Data.ByteString as BS
import Data.Word (Word8, Word32)
import Data.Array.Unboxed(UArray,array)
import Data.Array.Base(unsafeAt)
import Data.Bits(shiftL,shiftR,(.&.))
import Foreign.Ptr (plusPtr)
import Foreign.Storable (peek, poke)
import Foreign.ForeignPtr (withForeignPtr)
import System.IO.Unsafe (unsafePerformIO)

-- |Perform base64 encoding of data from standard input
main :: IO()
main = BL.getContents>>=BL.putStr.flip BL.append (BL.pack [BI.c2w '\n']).
                        BL.fromChunks.map encode64.reChunk.BL.toChunks

-- |Base64 index table 
tab64 :: UArray Word32 Word8
tab64 = array (0,63) $ zip [0..] $ map (BI.c2w) $ 
        ['A'..'Z']++['a'..'z']++['0'..'9']++['+','/']

-- |Encodes 3 octets as 4 sextets. This is what base64 basically :) is about.
enc64 :: (Word8,Word8,Word8)->(Word8,Word8,Word8,Word8)
enc64 (b1,b2,b3) = (t 3,t 2,t 1,t 0)
    where t x   = tab64 `unsafeAt` (n `shiftR` (x*6) .&. 63)
          f b n = fromIntegral b `shiftL` n
          n     = f b1 16 + f b2 8 + f b3 0

-- |Transforms list of ByteStrings to a new list of ByteStrings with 
-- lengths guaranteed to be multiples of 3 (excepting the last one)
-- Assumes that all input ByteStrings (excepting the last one) have 
-- at least a length of 3.
reChunk :: [BS.ByteString] -> [BS.ByteString]
reChunk (y:[]) = [y]
reChunk (y:z:zs) = let c = BS.length y `mod` 3 in 
                   BS.append y (BS.take 3 z):(reChunk $ (BS.drop 3 z):zs)

-- |Wraps Base64 enode in encode64io in unsafePerformIO to use in 
-- pure code. Use this only if you trust my 'encode64io' code is free 
-- of side effects and indepedent of the environment. Good Luck!
encode64 :: BS.ByteString -> BS.ByteString
encode64 = unsafePerformIO . encode64io

-- |Base64 encode a strict ByteString using foreign pointers within the 
-- IO monad.
encode64io :: BS.ByteString -> IO BS.ByteString
encode64io (BI.PS ptr offs len) = do 
    bs <- BI.mallocByteString ln
    withForeignPtr ptr $ \fp -> do 
        withForeignPtr bs $ \fbs -> do 
            let end = fp `plusPtr` (len+offs) 
                in go (fp`plusPtr`offs) fbs end bs
    where ln=((len + 2) `div` 3) * 4
          go fp fbs end bs
            | fp >= end = return $ BI.fromForeignPtr bs 0 ln
            | fp `plusPtr`1 == end = cnv 1
            | fp `plusPtr`2 == end = cnv 2
            | otherwise = cnv 3
            where pok nn n v  = if n>1 && nn<n 
                                    then poke (fbs`plusPtr`n) (61::Word8) 
                                    else poke (fbs`plusPtr`n) v
                  pek nn n = if nn<n then return 0 else peek (fp`plusPtr`n)
                  cnv :: Int -> IO BS.ByteString
                  cnv n = pek n 0>>= \b1->pek n 1>>= \b2->pek n 2>>= \b3->
                             let (e1,e2,e3,e4) = enc64(b1,b2,b3) in
                             pok n 0 e1>>pok n 1 e2>>pok n 2 e3>>pok n 3 e4 >>
                                 go (fp `plusPtr` 3) (fbs`plusPtr`4) end bs