From 152d9d81b5254d5744d22e4c6301d07cfcf99d25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A9lix=20Baylac-Jacqu=C3=A9?= Date: Wed, 17 Nov 2021 21:32:02 +0100 Subject: [PATCH] CSV intermediate file approach. time ./scripts/gen-fantoir.sh FANTOIR0721 real 0m13,319s user 0m9,804s sys 0m3,458s --- Cargo.lock | 140 ----------------------------------------- Cargo.toml | 3 +- scripts/gen-fantoir.sh | 30 +++++++++ src/main.rs | 29 +-------- 4 files changed, 34 insertions(+), 168 deletions(-) create mode 100755 scripts/gen-fantoir.sh diff --git a/Cargo.lock b/Cargo.lock index e6513c3..2bc6de9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,146 +2,6 @@ # It is not intended for manual editing. version = 3 -[[package]] -name = "ahash" -version = "0.7.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" -dependencies = [ - "getrandom", - "once_cell", - "version_check", -] - -[[package]] -name = "bitflags" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" - -[[package]] -name = "cfg-if" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" - -[[package]] -name = "fallible-iterator" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" - -[[package]] -name = "fallible-streaming-iterator" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" - [[package]] name = "fast-fantoir" version = "0.1.0" -dependencies = [ - "rusqlite", -] - -[[package]] -name = "getrandom" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753" -dependencies = [ - "cfg-if", - "libc", - "wasi", -] - -[[package]] -name = "hashbrown" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" -dependencies = [ - "ahash", -] - -[[package]] -name = "hashlink" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7249a3129cbc1ffccd74857f81464a323a152173cdb134e0fd81bc803b29facf" -dependencies = [ - "hashbrown", -] - -[[package]] -name = "libc" -version = "0.2.107" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbe5e23404da5b4f555ef85ebed98fb4083e55a00c317800bc2a50ede9f3d219" - -[[package]] -name = "libsqlite3-sys" -version = "0.23.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abd5850c449b40bacb498b2bbdfaff648b1b055630073ba8db499caf2d0ea9f2" -dependencies = [ - "pkg-config", - "vcpkg", -] - -[[package]] -name = "memchr" -version = "2.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" - -[[package]] -name = "once_cell" -version = "1.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "692fcb63b64b1758029e0a96ee63e049ce8c5948587f2f7208df04625e5f6b56" - -[[package]] -name = "pkg-config" -version = "0.3.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12295df4f294471248581bc09bef3c38a5e46f1e36d6a37353621a0c6c357e1f" - -[[package]] -name = "rusqlite" -version = "0.26.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a82b0b91fad72160c56bf8da7a549b25d7c31109f52cc1437eac4c0ad2550a7" -dependencies = [ - "bitflags", - "fallible-iterator", - "fallible-streaming-iterator", - "hashlink", - "libsqlite3-sys", - "memchr", - "smallvec", -] - -[[package]] -name = "smallvec" -version = "1.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ecab6c735a6bb4139c0caafd0cc3635748bbb3acf4550e8138122099251f309" - -[[package]] -name = "vcpkg" -version = "0.2.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" - -[[package]] -name = "version_check" -version = "0.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe" - -[[package]] -name = "wasi" -version = "0.10.2+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" diff --git a/Cargo.toml b/Cargo.toml index a9fed48..c579e29 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,5 +5,4 @@ edition = "2018" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html -[dependencies] -rusqlite = "*" \ No newline at end of file +[dependencies] \ No newline at end of file diff --git a/scripts/gen-fantoir.sh b/scripts/gen-fantoir.sh new file mode 100755 index 0000000..fd3b0aa --- /dev/null +++ b/scripts/gen-fantoir.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash +set -eau -o pipefail + +if [ "$#" -ne 1 ]; then + echo "Usage: gen-fantoir.sh path-to-fantoir-data" + echo "" + echo "ERROR: Missing fantoir data" + exit 1 +fi + +# Setup TMP dirs +tmpDir=$(mktemp -d) +clean_tmp () { + rm -r "${tmpDir}" +} +trap clean_tmp EXIT +tmpCsv="${tmpDir}"/fantoir.csv +tmpSql="${tmpDir}"/import-fantoir.sql + +echo "Generating fantoir CSV" +cargo run --release -- "$1" > "${tmpCsv}" + +echo "Generating fantoir SQLite DB" +cat >"${tmpSql}" < std::io::Result<()> { let fantoir_path = std::env::args().nth(1).unwrap(); - let connection = rusqlite::Connection::open("fantoir.sqlite").unwrap(); - connection.execute( - " - CREATE TABLE IF NOT EXISTS streets ( - id INTEGER PRIMARY KEY, - insee TEXT NOT NULL, - rivoli TEXT NOT NULL, - libelle TEXT NOT NULL - ); - ", [] - ).unwrap(); - let mut insert_statement = connection - .prepare("INSERT INTO streets (insee, rivoli, libelle) values (?,?,?)") - .unwrap(); let file = match File::open(&fantoir_path) { Err(err) => panic!("Cannot read file {}: {}", fantoir_path, err), Ok(file) => file, }; let reader = BufReader::new(file); - - let mut full_insee = String::with_capacity(5); + println!("full_insee;rivoli_with_key;libelle"); for line in reader.lines() { let l = line.unwrap(); if l.chars().nth(3) == Some(' ') { @@ -32,18 +16,11 @@ fn main() -> std::io::Result<()> { } else if l.chars().nth(6) == Some(' ') { // Enregistrement Commune } else { - full_insee = String::from(l.get(0..2).unwrap()); + let mut full_insee = String::from(l.get(0..2).unwrap()); full_insee.push_str(l.get(3..6).unwrap()); let rivoli_with_key = l.get(6..11).unwrap(); let libelle = l.get(15..41).unwrap(); - insert_statement.execute(rusqlite::params![&full_insee, rivoli_with_key, libelle]).unwrap(); - /* A priori on peut tout parser. - Il nous faut au moins: - - Libellé voie (index 15 a 41) - - code insee (index 3 a 5) - - code rivoli (??) - - */ + println!("{};{};{}", full_insee, rivoli_with_key, libelle); } }; Ok(())