Browse Source

CSV intermediate file approach.

time ./scripts/gen-fantoir.sh FANTOIR0721
real	0m13,319s
user	0m9,804s
sys	0m3,458s
fastestest
Félix Baylac-Jacqué 9 months ago
parent
commit
152d9d81b5
Signed by: NinjaTrappeur
GPG Key ID: EFD315F31848DBA4
  1. 140
      Cargo.lock
  2. 3
      Cargo.toml
  3. 30
      scripts/gen-fantoir.sh
  4. 29
      src/main.rs

140
Cargo.lock generated

@ -2,146 +2,6 @@
# It is not intended for manual editing.
version = 3
[[package]]
name = "ahash"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47"
dependencies = [
"getrandom",
"once_cell",
"version_check",
]
[[package]]
name = "bitflags"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "fallible-iterator"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7"
[[package]]
name = "fallible-streaming-iterator"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
[[package]]
name = "fast-fantoir"
version = "0.1.0"
dependencies = [
"rusqlite",
]
[[package]]
name = "getrandom"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753"
dependencies = [
"cfg-if",
"libc",
"wasi",
]
[[package]]
name = "hashbrown"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e"
dependencies = [
"ahash",
]
[[package]]
name = "hashlink"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7249a3129cbc1ffccd74857f81464a323a152173cdb134e0fd81bc803b29facf"
dependencies = [
"hashbrown",
]
[[package]]
name = "libc"
version = "0.2.107"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fbe5e23404da5b4f555ef85ebed98fb4083e55a00c317800bc2a50ede9f3d219"
[[package]]
name = "libsqlite3-sys"
version = "0.23.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "abd5850c449b40bacb498b2bbdfaff648b1b055630073ba8db499caf2d0ea9f2"
dependencies = [
"pkg-config",
"vcpkg",
]
[[package]]
name = "memchr"
version = "2.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a"
[[package]]
name = "once_cell"
version = "1.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "692fcb63b64b1758029e0a96ee63e049ce8c5948587f2f7208df04625e5f6b56"
[[package]]
name = "pkg-config"
version = "0.3.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "12295df4f294471248581bc09bef3c38a5e46f1e36d6a37353621a0c6c357e1f"
[[package]]
name = "rusqlite"
version = "0.26.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a82b0b91fad72160c56bf8da7a549b25d7c31109f52cc1437eac4c0ad2550a7"
dependencies = [
"bitflags",
"fallible-iterator",
"fallible-streaming-iterator",
"hashlink",
"libsqlite3-sys",
"memchr",
"smallvec",
]
[[package]]
name = "smallvec"
version = "1.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ecab6c735a6bb4139c0caafd0cc3635748bbb3acf4550e8138122099251f309"
[[package]]
name = "vcpkg"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
[[package]]
name = "version_check"
version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe"
[[package]]
name = "wasi"
version = "0.10.2+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6"

3
Cargo.toml

@ -5,5 +5,4 @@ edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
rusqlite = "*"
[dependencies]

30
scripts/gen-fantoir.sh

@ -0,0 +1,30 @@
#!/usr/bin/env bash
set -eau -o pipefail
if [ "$#" -ne 1 ]; then
echo "Usage: gen-fantoir.sh path-to-fantoir-data"
echo ""
echo "ERROR: Missing fantoir data"
exit 1
fi
# Setup TMP dirs
tmpDir=$(mktemp -d)
clean_tmp () {
rm -r "${tmpDir}"
}
trap clean_tmp EXIT
tmpCsv="${tmpDir}"/fantoir.csv
tmpSql="${tmpDir}"/import-fantoir.sql
echo "Generating fantoir CSV"
cargo run --release -- "$1" > "${tmpCsv}"
echo "Generating fantoir SQLite DB"
cat >"${tmpSql}" <<EOF
.separator ";"
.import ${tmpCsv} fantoir
EOF
sqlite3 fantoir.sqlite < "${tmpSql}"
echo "DB generated at $(pwd)/fantoir.sqlite"

29
src/main.rs

@ -1,30 +1,14 @@
use std::fs::File;
use std::io::{BufReader, BufRead};
use rusqlite;
fn main() -> std::io::Result<()> {
let fantoir_path = std::env::args().nth(1).unwrap();
let connection = rusqlite::Connection::open("fantoir.sqlite").unwrap();
connection.execute(
"
CREATE TABLE IF NOT EXISTS streets (
id INTEGER PRIMARY KEY,
insee TEXT NOT NULL,
rivoli TEXT NOT NULL,
libelle TEXT NOT NULL
);
", []
).unwrap();
let mut insert_statement = connection
.prepare("INSERT INTO streets (insee, rivoli, libelle) values (?,?,?)")
.unwrap();
let file = match File::open(&fantoir_path) {
Err(err) => panic!("Cannot read file {}: {}", fantoir_path, err),
Ok(file) => file,
};
let reader = BufReader::new(file);
let mut full_insee = String::with_capacity(5);
println!("full_insee;rivoli_with_key;libelle");
for line in reader.lines() {
let l = line.unwrap();
if l.chars().nth(3) == Some(' ') {
@ -32,18 +16,11 @@ fn main() -> std::io::Result<()> {
} else if l.chars().nth(6) == Some(' ') {
// Enregistrement Commune
} else {
full_insee = String::from(l.get(0..2).unwrap());
let mut full_insee = String::from(l.get(0..2).unwrap());
full_insee.push_str(l.get(3..6).unwrap());
let rivoli_with_key = l.get(6..11).unwrap();
let libelle = l.get(15..41).unwrap();
insert_statement.execute(rusqlite::params![&full_insee, rivoli_with_key, libelle]).unwrap();
/* A priori on peut tout parser.
Il nous faut au moins:
- Libellé voie (index 15 a 41)
- code insee (index 3 a 5)
- code rivoli (??)
*/
println!("{};{};{}", full_insee, rivoli_with_key, libelle);
}
};
Ok(())

Loading…
Cancel
Save