commit
d4488c5096
5 changed files with 114 additions and 0 deletions
@ -0,0 +1,22 @@
|
||||
CFLAGS=-O2 -Wall $(shell pkgconf -cflags icu-io)
|
||||
LDFLAGS=$(shell pkgconf -libs icu-io)
|
||||
CC=gcc
|
||||
|
||||
all: haskell-read-utf8 icu-read-utf8 bench |
||||
.PHONY: all |
||||
|
||||
bench: haskell-read-utf8 icu-read-utf8 |
||||
hyperfine ./haskell-read-utf8 ./icu-read-utf8
|
||||
.PHONE: bench |
||||
|
||||
haskell-read-utf8: ./haskell/HaskellReadUTF8.hs |
||||
ghc -o ./haskell-read-utf8 -O ./haskell/HaskellReadUTF8.hs
|
||||
|
||||
icu-read-utf8: ./icu/icu-read-utf8.c |
||||
$(CC) $(CFLAGS) $(LDFLAGS) ./icu/icu-read-utf8.c -o ./icu-read-utf8
|
||||
|
||||
clean: |
||||
rm -f ./haskell/*.{o,hi}
|
||||
rm -f ./haskell-read-utf8
|
||||
rm -f ./icu-read-utf8
|
||||
.PHONY: clean |
@ -0,0 +1,24 @@
|
||||
{ pkgs ? import <nixpkgs> {} }: |
||||
|
||||
let ghcClosure = |
||||
pkgs.haskellPackages.ghcWithPackages |
||||
(p:[ |
||||
p.text |
||||
]); |
||||
|
||||
in pkgs.stdenv.mkDerivation { |
||||
pname = "bench-my-utf8"; |
||||
version = "0.0.1"; |
||||
installPhase = '' |
||||
mkdir -p $out/bin |
||||
mv haskell-read-utf8 $out/bin |
||||
''; |
||||
nativeBuildInputs = [ |
||||
pkgs.gnumake |
||||
ghcClosure |
||||
pkgs.gcc |
||||
pkgs.icu |
||||
pkgs.hyperfine |
||||
pkgs.pkgconf |
||||
]; |
||||
} |
@ -0,0 +1,7 @@
|
||||
module Main where |
||||
|
||||
import qualified Data.Text.IO as TIO |
||||
import qualified Data.Text as T |
||||
|
||||
main :: IO () |
||||
main = TIO.readFile "text-test-data/english.txt" >> pure () |
@ -0,0 +1,11 @@
|
||||
#include <unicode/ustdio.h> |
||||
#include <stdlib.h> |
||||
#include <unicode/umachine.h> |
||||
|
||||
int main(void) |
||||
{ |
||||
UFILE *in = u_fopen("text-test-data/english.txt", "r", NULL, "UTF-8"); |
||||
// Using a 10 MB buffer for now. It fits all the corpus I want to test agaisnt.
|
||||
UChar *charBuff = malloc(10000000 * sizeof(UChar)); |
||||
int32_t i = u_file_read(charBuff, 10000000, in); |
||||
} |
@ -0,0 +1,50 @@
|
||||
# UTF-8 Benchmark |
||||
|
||||
The idea is to benchmark several utf-8 aspects of the Haskell Text package. Namely utf-8 encoding/decoding and various unicode casing operations. Hopefully, we'll find ways to improve text's performance. |
||||
|
||||
For the time being, we're comparing the Text implementation with the C ICU one. In the future I also plan to test it against C++ Boost and the Rust stdlib. |
||||
|
||||
# Implemented so far |
||||
|
||||
- [x] UTF-8 decoding from file. |
||||
- [x] English |
||||
- [ ] Chinese |
||||
- [ ] French |
||||
- [ ] Russian |
||||
- [ ] UTF-8 encoding to file. |
||||
- [ ] English |
||||
- [ ] Chinese |
||||
- [ ] French |
||||
- [ ] Russian |
||||
- [ ] UTF-8 encoding to file. |
||||
- [ ] English |
||||
- [ ] Chinese |
||||
- [ ] French |
||||
- [ ] Russian |
||||
|
||||
|
||||
# Usage |
||||
|
||||
```bash |
||||
nix-shell |
||||
make all |
||||
``` |
||||
|
||||
# Findings |
||||
|
||||
## UTF-8 Decoding |
||||
|
||||
``` |
||||
hyperfine ./haskell-read-utf8 ./icu-read-utf8 |
||||
Benchmark #1: ./haskell-read-utf8 |
||||
Time (mean ± σ): 23.3 ms ± 0.9 ms [User: 14.9 ms, System: 8.3 ms] |
||||
Range (min … max): 22.0 ms … 26.1 ms 111 runs |
||||
|
||||
Benchmark #2: ./icu-read-utf8 |
||||
Time (mean ± σ): 12.5 ms ± 0.8 ms [User: 7.6 ms, System: 4.9 ms] |
||||
Range (min … max): 11.5 ms … 16.1 ms 176 runs |
||||
|
||||
Summary |
||||
'./icu-read-utf8' ran |
||||
1.85 ± 0.14 times faster than './haskell-read-utf8' |
||||
``` |
Loading…
Reference in new issue