forked from mongodb-haskell/bson
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Tony Hannan
committed
May 14, 2010
0 parents
commit 74e5265
Showing
5 changed files
with
339 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
-- | A BSON document is a JSON-like object with a standard binary encoding defined at bsonspec.org. This implements version 1.0 of that spec. | ||
-- Use the GHC language extension "OverloadedStrings" to automatically convert String literals to UString (UTF8) | ||
|
||
{-# LANGUAGE OverloadedStrings, TypeSynonymInstances #-} | ||
|
||
module Data.Bson ( | ||
UString, | ||
Document, Element(..), Value(..), | ||
label, value, | ||
lookup | ||
) where | ||
|
||
import Prelude hiding (lookup) | ||
import Data.Bson.ObjectId (ObjectId) | ||
import Data.Int | ||
import qualified Data.CompactString.UTF8 as U | ||
import Data.Time.Clock (UTCTime) | ||
import Data.Time.Format () -- for Show and Read instances of UTCTime | ||
import Data.ByteString.Char8 (ByteString) | ||
import Data.List (find) | ||
import Text.Read (Read(..)) | ||
import Control.Applicative ((<$>), (<*>)) | ||
|
||
instance Read U.CompactString where | ||
readPrec = U.pack <$> readPrec | ||
|
||
type UString = U.CompactString | ||
-- ^ UTF-8 String | ||
|
||
-- * Document structure | ||
|
||
type Document = [Element] | ||
|
||
data Element = Label := Value deriving (Show, Read, Eq) | ||
|
||
type Label = UString | ||
|
||
data Value = | ||
Float Double | ||
| String UString | ||
| Document Document | ||
| Array [Value] | ||
| Function ByteString | ||
| Binary ByteString | ||
| UUID ByteString | ||
| MD5 ByteString | ||
| UserDefined ByteString | ||
| ObjectId ObjectId | ||
| Bool Bool | ||
| UTC UTCTime | ||
| Null | ||
| Regex UString UString | ||
| JavascriptCode UString | ||
| Symbol UString | ||
| JavascriptClosure UString Document | ||
| Int32 Int32 | ||
| MongoStamp Int64 | ||
| Int64 Int64 | ||
| MinKey | ||
| MaxKey | ||
deriving (Show, Read, Eq) | ||
|
||
-- ** Document access | ||
|
||
lookup :: Label -> Document -> Maybe Value | ||
-- ^ Lookup value of named element in document | ||
lookup k o = value <$> find ((k ==) . label) o | ||
|
||
label :: Element -> Label | ||
label (k := _) = k | ||
|
||
value :: Element -> Value | ||
value (_ := v) = v |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,202 @@ | ||
-- | Standard binary encoding of BSON documents, version 1.0. See bsonspec.org | ||
|
||
module Data.Bson.Binary ( | ||
encode, decode | ||
) where | ||
|
||
import Prelude hiding (length, concat) | ||
import Data.Bson | ||
import Data.Bson.ObjectId | ||
import Data.Int | ||
import Data.Word | ||
import Data.Binary.Put | ||
import Data.Binary.Get | ||
import Data.Binary.IEEE754 | ||
import Data.ByteString.Char8 (ByteString, pack, length, concat) | ||
import qualified Data.ByteString.Lazy.Char8 as L (ByteString, toChunks, length) | ||
import qualified Data.CompactString.UTF8 as U | ||
import Data.Time.Clock (UTCTime) | ||
import Data.Time.Clock.POSIX | ||
import Control.Applicative ((<$>), (<*>)) | ||
|
||
encode :: Document -> L.ByteString | ||
-- ^ Binary representation of document | ||
encode doc = runPut (putDocument doc) | ||
|
||
decode :: L.ByteString -> Document | ||
-- ^ Haskell representation of binary BSON document | ||
decode bytes = runGet getDocument bytes | ||
|
||
putElement :: Element -> Put | ||
-- ^ Write binary representation of element | ||
putElement (k := v) = case v of | ||
Float x -> putTL 0x01 >> putDouble x | ||
String x -> putTL 0x02 >> putString x | ||
Document x -> putTL 0x03 >> putDocument x | ||
Array x -> putTL 0x04 >> putArray x | ||
Function x -> putTL 0x05 >> putBinary 0x01 x | ||
Binary x -> putTL 0x05 >> putBinary 0x02 x | ||
UUID x -> putTL 0x05 >> putBinary 0x03 x | ||
MD5 x -> putTL 0x05 >> putBinary 0x05 x | ||
UserDefined x -> putTL 0x05 >> putBinary 0x80 x | ||
ObjectId x -> putTL 0x07 >> putObjectId x | ||
Bool x -> putTL 0x08 >> putBool x | ||
UTC x -> putTL 0x09 >> putUTC x | ||
Null -> putTL 0x0A | ||
Regex x y -> putTL 0x0B >> putCString x >> putCString y | ||
JavascriptCode x -> putTL 0x0D >> putString x | ||
Symbol x -> putTL 0x0E >> putString x | ||
JavascriptClosure x y -> putTL 0x0F >> putClosure x y | ||
Int32 x -> putTL 0x10 >> putInt32 x | ||
MongoStamp x -> putTL 0x11 >> putInt64 x | ||
Int64 x -> putTL 0x12 >> putInt64 x | ||
MinKey -> putTL 0xFF | ||
MaxKey -> putTL 0x7F | ||
where | ||
putTL t = putTag t >> putLabel k | ||
|
||
getElement :: Get Element | ||
-- ^ Read binary representation of Element | ||
getElement = do | ||
t <- getTag | ||
k <- getLabel | ||
v <- case t of | ||
0x01 -> Float <$> getDouble | ||
0x02 -> String <$> getString | ||
0x03 -> Document <$> getDocument | ||
0x04 -> Array <$> getArray | ||
0x05 -> getBinary >>= \(s, x) -> case s of | ||
0x01 -> return (Function x) | ||
0x02 -> return (Binary x) | ||
0x03 -> return (UUID x) | ||
0x05 -> return (MD5 x) | ||
0x80 -> return (UserDefined x) | ||
_ -> fail $ "unknown Bson binary subtype " ++ show s | ||
0x07 -> ObjectId <$> getObjectId | ||
0x08 -> Bool <$> getBool | ||
0x09 -> UTC <$> getUTC | ||
0x0A -> return Null | ||
0x0B -> Regex <$> getCString <*> getCString | ||
0x0D -> JavascriptCode <$> getString | ||
0x0E -> Symbol <$> getString | ||
0x0F -> uncurry JavascriptClosure <$> getClosure | ||
0x10 -> Int32 <$> getInt32 | ||
0x11 -> MongoStamp <$> getInt64 | ||
0x12 -> Int64 <$> getInt64 | ||
0xFF -> return MinKey | ||
0x7F -> return MaxKey | ||
_ -> fail $ "unknown Bson element type " ++ show t | ||
return (k := v) | ||
|
||
putTag = putWord8 | ||
getTag = getWord8 | ||
|
||
putLabel = putCString | ||
getLabel = getCString | ||
|
||
putDouble = putFloat64le | ||
getDouble = getFloat64le | ||
|
||
putInt32 :: Int32 -> Put | ||
putInt32 = putWord32le . fromIntegral | ||
|
||
getInt32 :: Get Int32 | ||
getInt32 = fromIntegral <$> getWord32le | ||
|
||
putInt64 :: Int64 -> Put | ||
putInt64 = putWord64le . fromIntegral | ||
|
||
getInt64 :: Get Int64 | ||
getInt64 = fromIntegral <$> getWord64le | ||
|
||
putCString :: UString -> Put | ||
putCString x = do | ||
putByteString (U.toByteString x) | ||
putWord8 0 | ||
|
||
getCString :: Get UString | ||
getCString = U.fromByteString_ . concat . L.toChunks <$> getLazyByteStringNul | ||
|
||
putString :: UString -> Put | ||
putString x = let b = U.toByteString x in do | ||
putInt32 $ toEnum (length b + 1) | ||
putByteString b | ||
putWord8 0 | ||
|
||
getString :: Get UString | ||
getString = do | ||
len <- subtract 1 <$> getInt32 | ||
b <- getByteString (fromIntegral len) | ||
getWord8 | ||
return (U.fromByteString_ b) | ||
|
||
putDocument :: Document -> Put | ||
putDocument es = let b = runPut (mapM_ putElement es) in do | ||
putInt32 $ (toEnum . fromEnum) (L.length b + 1) | ||
putLazyByteString b | ||
putWord8 0 | ||
|
||
getDocument :: Get Document | ||
getDocument = do | ||
len <- subtract 1 <$> getInt32 | ||
b <- getLazyByteString (fromIntegral len) | ||
getWord8 | ||
return (runGet getElements b) | ||
where | ||
getElements = isEmpty >>= \done -> if done | ||
then return [] | ||
else (:) <$> getElement <*> getElements | ||
|
||
putArray :: [Value] -> Put | ||
putArray vs = putDocument (zipWith f [0..] vs) | ||
where f i v = U.pack (show i) := v | ||
|
||
getArray :: Get [Value] | ||
getArray = map value <$> getDocument | ||
|
||
type Subtype = Word8 | ||
|
||
putBinary :: Subtype -> ByteString -> Put | ||
putBinary t x = do | ||
putInt32 $ toEnum (length x) | ||
putTag t | ||
putByteString x | ||
|
||
getBinary :: Get (Subtype, ByteString) | ||
getBinary = do | ||
len <- getInt32 | ||
t <- getTag | ||
x <- getByteString (fromIntegral len) | ||
return (t, x) | ||
|
||
putObjectId :: ObjectId -> Put | ||
putObjectId (Oid x y) = putWord32be x >> putWord64be y | ||
|
||
getObjectId :: Get ObjectId | ||
getObjectId = Oid <$> getWord32be <*> getWord64be | ||
|
||
putBool :: Bool -> Put | ||
putBool x = putWord8 (if x then 1 else 0) | ||
|
||
getBool :: Get Bool | ||
getBool = (> 0) <$> getWord8 | ||
|
||
putUTC :: UTCTime -> Put | ||
-- store milliseconds since Unix epoch | ||
putUTC x = putInt64 $ truncate (utcTimeToPOSIXSeconds x * 1000) | ||
|
||
getUTC :: Get UTCTime | ||
-- stored as millisecinds since Unix epoch | ||
getUTC = posixSecondsToUTCTime . (/ 1000) . realToFrac <$> getInt64 | ||
|
||
putClosure :: UString -> Document -> Put | ||
putClosure x y = let b = runPut (putString x >> putDocument y) in do | ||
putInt32 $ (toEnum . fromEnum) (L.length b) | ||
putLazyByteString b | ||
|
||
getClosure :: Get (UString, Document) | ||
getClosure = do | ||
getInt32 | ||
x <- getString | ||
y <- getDocument | ||
return (x, y) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
|
||
module Data.Bson.ObjectId ( | ||
ObjectId(..), genObjectId, | ||
timestamp | ||
) where | ||
|
||
import Control.Applicative ((<$>)) | ||
import Data.Word | ||
import Data.Bits (shift, (.|.)) | ||
import Data.ByteString.Char8 (pack) | ||
import Data.Digest.OpenSSL.MD5 (md5sum) | ||
import Numeric (readHex) | ||
import Network.BSD (getHostName) | ||
import System.Posix.Process (getProcessID) | ||
import Data.Time.Clock (UTCTime) | ||
import Data.Time.Clock.POSIX (getPOSIXTime, posixSecondsToUTCTime) | ||
import System.IO.Unsafe (unsafePerformIO) | ||
import Data.IORef | ||
|
||
data ObjectId = Oid Word32 Word64 deriving (Show, Read, Eq, Ord) | ||
-- ^ A BSON ObjectID is a 12-byte value consisting of a 4-byte timestamp (seconds since epoch), a 3-byte machine id, a 2-byte process id, and a 3-byte counter. Note that the timestamp and counter fields must be stored big endian unlike the rest of BSON. This is because they are compared byte-by-byte and we want to ensure a mostly increasing order. | ||
|
||
timestamp :: ObjectId -> UTCTime | ||
timestamp (Oid time _) = posixSecondsToUTCTime (fromIntegral time) | ||
|
||
genObjectId :: IO ObjectId | ||
-- ^ Create a fresh ObjectId | ||
genObjectId = do | ||
time <- truncate <$> getPOSIXTime | ||
pid <- fromIntegral <$> getProcessID | ||
inc <- nextCount | ||
return $ Oid time (composite machineId pid inc) | ||
where | ||
machineId :: Word24 | ||
machineId = unsafePerformIO (fst . head . readHex . take 6 . md5sum . pack <$> getHostName) | ||
{-# NOINLINE machineId #-} | ||
counter :: IORef Word24 | ||
counter = unsafePerformIO (newIORef 0) | ||
{-# NOINLINE counter #-} | ||
nextCount :: IO Word24 | ||
nextCount = atomicModifyIORef counter $ \n -> (wrap24 (n + 1), n) | ||
composite :: Word24 -> Word16 -> Word24 -> Word64 | ||
composite mid pid inc = fromIntegral mid `shift` 40 .|. fromIntegral pid `shift` 24 .|. fromIntegral inc | ||
|
||
type Word24 = Word32 | ||
-- ^ low 3 bytes only, high byte must be zero | ||
|
||
wrap24 :: Word24 -> Word24 | ||
wrap24 n = n `mod` 0x1000000 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
#! /usr/bin/env runhaskell | ||
|
||
> import Distribution.Simple | ||
> main = defaultMain |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
Name: bson | ||
Version: 0.0.0 | ||
Synopsis: BSON documents are JSON-like objects with a standard binary encoding defined at bsonspec.org. This implements version 1.0 of that spec. | ||
Description: | ||
Category: Data | ||
License: BSD | ||
Author: Tony Hannan | ||
Maintainer: Tony Hannan <[email protected]> | ||
Build-Depends: base, time, bytestring, unix, network, nano-md5, binary, data-binary-ieee754, compact-string | ||
Build-Type: Simple | ||
Exposed-modules: Data.Bson, Data.Bson.ObjectId, Data.Bson.Binary |