From f4815eb60c2b87b9c5402bc157af9d5b3ed958a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A9lix=20Baylac-Jacqu=C3=A9?= Date: Tue, 29 Jun 2021 21:02:27 +0200 Subject: [PATCH] Initial benchmark: comparing haskell's Text with icu on EN corpus Run `make all` to run the benchmark. --- Makefile | 22 ++++++++++++++++++++++ default.nix | 24 ++++++++++++++++++++++++ haskell/HaskellReadUTF8.hs | 7 +++++++ icu/icu-read-utf8.c | 12 ++++++++++++ 4 files changed, 65 insertions(+) create mode 100644 Makefile create mode 100644 default.nix create mode 100644 haskell/HaskellReadUTF8.hs create mode 100644 icu/icu-read-utf8.c diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..5c98cdf --- /dev/null +++ b/Makefile @@ -0,0 +1,22 @@ +CFLAGS=-O2 -Wall $(shell pkgconf -cflags icu-io) +LDFLAGS=$(shell pkgconf -libs icu-io) +CC=gcc + +all: haskell-read-utf8 icu-read-utf8 bench +.PHONY: all + +bench: haskell-read-utf8 icu-read-utf8 + hyperfine ./haskell-read-utf8 ./icu-read-utf8 +.PHONE: bench + +haskell-read-utf8: ./haskell/HaskellReadUTF8.hs + ghc -o ./haskell-read-utf8 -O ./haskell/HaskellReadUTF8.hs + +icu-read-utf8: ./icu/icu-read-utf8.c + $(CC) $(CFLAGS) $(LDFLAGS) ./icu/icu-read-utf8.c -o ./icu-read-utf8 + +clean: + rm -f ./haskell/*.{o,hi} + rm -f ./haskell-read-utf8 + rm -f ./icu-read-utf8 +.PHONY: clean diff --git a/default.nix b/default.nix new file mode 100644 index 0000000..c882b00 --- /dev/null +++ b/default.nix @@ -0,0 +1,24 @@ +{ pkgs ? import {} }: + +let ghcClosure = + pkgs.haskellPackages.ghcWithPackages + (p:[ + p.text + ]); + +in pkgs.stdenv.mkDerivation { + pname = "bench-my-utf8"; + version = "0.0.1"; + installPhase = '' + mkdir -p $out/bin + mv haskell-read-utf8 $out/bin + ''; + nativeBuildInputs = [ + pkgs.gnumake + ghcClosure + pkgs.gcc + pkgs.icu + pkgs.hyperfine + pkgs.pkgconf + ]; +} diff --git a/haskell/HaskellReadUTF8.hs b/haskell/HaskellReadUTF8.hs new file mode 100644 index 0000000..7f45316 --- /dev/null +++ b/haskell/HaskellReadUTF8.hs @@ -0,0 +1,7 @@ +module Main where + +import qualified Data.Text.IO as TIO +import qualified Data.Text as T + +main :: IO () +main = TIO.readFile "text-test-data/english.txt" >> pure () diff --git a/icu/icu-read-utf8.c b/icu/icu-read-utf8.c new file mode 100644 index 0000000..0f71069 --- /dev/null +++ b/icu/icu-read-utf8.c @@ -0,0 +1,12 @@ +#include +#include +#include + +int main(void) +{ + UFILE *in = u_fopen("text-test-data/english.txt", "r", NULL, "UTF-8"); + UChar *charBuff = malloc(100000000 * sizeof(UChar)); + int32_t i = u_file_read(charBuff, 100000000, in); + u_printf("%s\n", charBuff); + u_printf("%i\n", i); +}