Skip to content

Commit

Permalink
initial
Browse files Browse the repository at this point in the history
  • Loading branch information
metagn committed Feb 18, 2022
0 parents commit 2c1b439
Show file tree
Hide file tree
Showing 6 changed files with 261 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
*.exe
*.dll
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# shorteststring

word size strings, max 4 characters on 32 bit and 8 characters on 64 bit
32 changes: 32 additions & 0 deletions shorteststring.nimble
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Package

version = "0.1.0"
author = "metagn"
description = "word size strings"
license = "MIT"
srcDir = "src"


# Dependencies

requires "nim >= 1.0.0"

when (NimMajor, NimMinor) >= (1, 4):
when (compiles do: import nimbleutils):
import nimbleutils

task docs, "build docs for all modules":
when declared(buildDocs):
buildDocs(gitUrl = "https://github.com/metagn/shorteststring")
else:
echo "docs task not implemented, need nimbleutils"

task tests, "run tests for multiple backends":
when declared(runTests):
runTests(optionCombos = @[
"",
"--gc:orc",
"--gc:orc -d:useMalloc"
])
else:
echo "tests task not implemented, need nimbleutils"
203 changes: 203 additions & 0 deletions src/shorteststring.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
type
ShortestString* = distinct uint

proc `==`*(a, b: ShortestString): bool {.borrow.}
proc `<`*(a, b: ShortestString): bool {.borrow.}

const shortestStringMaxSize* = sizeof(ShortestString) div sizeof(char)
const charBits = sizeof(char) * 8

template get(ss: ShortestString, i: int): char =
char(
(ss.uint shr (i * charBits)) and
high(char).uint)

template set(ss: var ShortestString, i: int, c: char) =
ss = ShortestString(ss.uint or
(c.uint shl (i * charBits)))

proc `[]`*(ss: ShortestString, i: int): char {.inline.} =
rangeCheck i >= 0 and i < shortestStringMaxSize
get(ss, i)

proc `[]=`*(ss: var ShortestString, i: int, c: char) {.inline.} =
rangeCheck i >= 0 and i < shortestStringMaxSize
set(ss, i, c)

proc len*(ss: ShortestString): int =
when false:
# unrolled loop
{.push rangeChecks: off.}
template doIndex(i: int) =
if get(ss, i) == char(0):
return i
doIndex 0
doIndex 1
doIndex 2
doIndex 3
doIndex 4
doIndex 5
doIndex 6
doIndex 7
return 8
{.pop.}
else:
if ss.uint <= 0xFF:
1
else:
when sizeof(uint) == 2:
2
else:
if ss.uint <= 0xFF_FF:
2
elif ss.uint <= 0xFF_FF_FF:
3
else:
when sizeof(uint) == 4:
4
else:
if ss.uint <= 0xFF_FF_FF_FF'u:
4
elif ss.uint <= 0xFF_FF_FF_FF_FF'u:
5
elif ss.uint <= 0xFF_FF_FF_FF_FF_FF'u:
6
elif ss.uint <= 0xFF_FF_FF_FF_FF_FF_FF'u:
7
else:
8

template `[]`*(ss: ShortestString, i: BackwardsIndex): char =
ss[ss.len - i.int]

template `[]=`*(ss: var ShortestString, i: BackwardsIndex, c: char) =
ss[ss.len - i.int] = c

proc `[]`*(ss: ShortestString, sl: Slice[int]): ShortestString {.inline.} =
rangeCheck sl.a >= 0 and sl.a < shortestStringMaxSize and sl.b >= 0 and sl.b < shortestStringMaxSize
ShortestString((ss.uint shl (sl.a * charBits)) shr ((sl.len - sl.b + sl.a - 1) * charBits))

proc `[]=`*(ss: var ShortestString, sl: Slice[int], ss2: ShortestString) {.inline.} =
rangeCheck sl.a >= 0 and sl.a < shortestStringMaxSize and sl.b >= 0 and sl.b < shortestStringMaxSize
for i in sl:
ss[i] = ss2[i - sl.a]

iterator items*(ss: ShortestString): char =
# not unrolled because nim doesnt allow return
{.push rangeChecks: off.}
var i = 0
while i < shortestStringMaxSize:
let c = get(ss, i)
if c == char(0):
break
yield c
inc i
{.pop.}

when not defined(js) and not defined(nimscript):
when defined(gcc) or defined(llvm_gcc) or defined(clang):
when shortestStringMaxSize == 2:
proc swapEndian(a: uint): uint {.
importc: "__builtin_bswap16", nodecl, noSideEffect.}
elif shortestStringMaxSize == 4:
proc swapEndian(a: uint): uint {.
importc: "__builtin_bswap32", nodecl, noSideEffect.}
elif shortestStringMaxSize == 8:
proc swapEndian(a: uint): uint {.
importc: "__builtin_bswap64", nodecl, noSideEffect.}
elif defined(icc):
when shortestStringMaxSize == 2:
proc swapEndian(a: uint): uint {.
importc: "_bswap16", nodecl, noSideEffect.}
elif shortestStringMaxSize == 4:
proc swapEndian(a: uint): uint {.
importc: "_bswap", nodecl, noSideEffect.}
elif shortestStringMaxSize == 8:
proc swapEndian(a: uint): uint {.
importc: "_bswap64", nodecl, noSideEffect.}
elif defined(vcc):
when shortestStringMaxSize == 2:
proc swapEndian(a: uint): uint {.
importc: "_byteswap_ushortest", nodecl, header: "<intrin.h>", noSideEffect.}
elif shortestStringMaxSize == 4:
proc swapEndian(a: uint): uint {.
importc: "_byteswap_ulong", nodecl, header: "<intrin.h>", noSideEffect.}
elif shortestStringMaxSize == 8:
proc swapEndian(a: uint): uint {.
importc: "_byteswap_uint64", nodecl, header: "<intrin.h>", noSideEffect.}
when declared(swapEndian):
template toLittleEndian(x: uint): uint =
when cpuEndian == bigEndian:
swapEndian(x)
else:
x

proc `$`*(ss: ShortestString): string =
when nimvm:
result = newStringOfCap(sizeof(ShortestString))
for c in ss.items:
result.add(c)
else:
when defined(js) or defined(nimscript) or (cpuEndian == bigEndian and not declared(swapEndian)):
result = newStringOfCap(sizeof(ShortestString))
for c in ss.items:
result.add(c)
else:
# this should be faster than adding one at a time, but we still have to calculate length
if ss.uint == 0:
result = ""
else:
result = newString(ss.len)
cast[ptr uint](addr result[0])[] = toLittleEndian(ss.uint)

iterator mitems*(ss: var ShortestString): var char =
{.push rangeChecks: off.}
var i = 0
while i < shortestStringMaxSize:
var c = get(ss, i)
if c == char(0):
break
yield addr(c)[]
ss[i] = c
inc i
{.pop.}

proc add*(ss: var ShortestString, c: char) =
{.push rangeChecks: off.}
var i = 0
while i < shortestStringMaxSize:
let c = get(ss, i)
if c == char(0):
set(ss, i, c)
return
inc i
{.pop.}
assert false, "string " & $ss & " is full"

proc toShortestString*(s: openarray[char], optimized: static bool = true): ShortestString =
rangeCheck s.len <= shortestStringMaxSize
when nimvm:
for i, c in s:
result[i] = c
else:
when defined(js) or defined(nimscript) or not optimized or (cpuEndian == bigEndian and not declared(swapEndian)):
for i, c in s:
result[i] = c
else:
if s.len == 0:
# bypass nil
result = ShortestString(0)
else:
# this might still be invalid memory access
#ShortestString(cast[ptr uint](unsafeAddr s[0])[] and
# # use unsigned to bypass overflow
# (1u shl (result.len.uint * charBits.uint + 1u) - 1u))
# XXX benchmark if this is faster (benchmark in general)
let offset = shortestStringMaxSize - s.len
result = ShortestString(
(cast[ptr uint](unsafeAddr s[0])[].toLittleEndian shl
(offset * charBits)) shr
(offset * charBits))

template shortest*(s: static string): ShortestString =
toShortestString(s)
1 change: 1 addition & 0 deletions tests/config.nims
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
switch("path", "$projectDir/../src")
20 changes: 20 additions & 0 deletions tests/test1.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import shorteststring

block:
for s in ["", "a", "ab", "abc", "abcd", "abcde", "abcdef", "abcdefg", "abcdefgh"]:
block:
let ss = s.toShortestString
doAssert $ss == s
for i in 0 ..< s.len:
doAssert s[i] == ss[i]
block:
let ss = s.toShortestString(optimized = false)
doAssert $ss == s
for i in 0 ..< s.len:
doAssert s[i] == ss[i]
doAssert shortest"ab" < shortest"abc"
doAssert shortest"ab" < shortest"ac"
doAssert shortest"ab" < shortest"bb"
doAssert toShortestString"ab" < toShortestString"abc"
doAssert toShortestString"ab" < toShortestString"ac"
doAssert toShortestString"ab" < toShortestString"bb"

0 comments on commit 2c1b439

Please sign in to comment.