Compare commits
1 Commits
master
...
sqlite-nat
Author | SHA1 | Date |
---|---|---|
Félix Baylac-Jacqué | 7a43fdcdd6 |
|
@ -2,6 +2,146 @@
|
|||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "ahash"
|
||||
version = "0.7.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47"
|
||||
dependencies = [
|
||||
"getrandom",
|
||||
"once_cell",
|
||||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "1.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||
|
||||
[[package]]
|
||||
name = "fallible-iterator"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7"
|
||||
|
||||
[[package]]
|
||||
name = "fallible-streaming-iterator"
|
||||
version = "0.1.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
|
||||
|
||||
[[package]]
|
||||
name = "fast-fantoir"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"rusqlite",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "getrandom"
|
||||
version = "0.2.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "418d37c8b1d42553c93648be529cb70f920d3baf8ef469b74b9638df426e0b4c"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"wasi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.11.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashlink"
|
||||
version = "0.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7249a3129cbc1ffccd74857f81464a323a152173cdb134e0fd81bc803b29facf"
|
||||
dependencies = [
|
||||
"hashbrown",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.117"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e74d72e0f9b65b5b4ca49a346af3976df0f9c61d550727f349ecd559f251a26c"
|
||||
|
||||
[[package]]
|
||||
name = "libsqlite3-sys"
|
||||
version = "0.23.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d2cafc7c74096c336d9d27145f7ebd4f4b6f95ba16aa5a282387267e6925cb58"
|
||||
dependencies = [
|
||||
"pkg-config",
|
||||
"vcpkg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a"
|
||||
|
||||
[[package]]
|
||||
name = "once_cell"
|
||||
version = "1.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "da32515d9f6e6e489d7bc9d84c71b060db7247dc035bbe44eac88cf87486d8d5"
|
||||
|
||||
[[package]]
|
||||
name = "pkg-config"
|
||||
version = "0.3.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "58893f751c9b0412871a09abd62ecd2a00298c6c83befa223ef98c52aef40cbe"
|
||||
|
||||
[[package]]
|
||||
name = "rusqlite"
|
||||
version = "0.26.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4ba4d3462c8b2e4d7f4fcfcf2b296dc6b65404fbbc7b63daa37fd485c149daf7"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"fallible-iterator",
|
||||
"fallible-streaming-iterator",
|
||||
"hashlink",
|
||||
"libsqlite3-sys",
|
||||
"memchr",
|
||||
"smallvec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "smallvec"
|
||||
version = "1.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83"
|
||||
|
||||
[[package]]
|
||||
name = "vcpkg"
|
||||
version = "0.2.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
|
||||
|
||||
[[package]]
|
||||
name = "version_check"
|
||||
version = "0.9.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
|
||||
|
||||
[[package]]
|
||||
name = "wasi"
|
||||
version = "0.10.2+wasi-snapshot-preview1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6"
|
||||
|
|
|
@ -5,4 +5,6 @@ edition = "2018"
|
|||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
[dependencies]
|
||||
# Tested with rusqlite 0.26.3
|
||||
rusqlite = "*"
|
|
@ -26,3 +26,7 @@ CREATE TABLE IF NOT EXISTS "fantoir"(
|
|||
"libelle" TEXT
|
||||
);
|
||||
```
|
||||
|
||||
## Data
|
||||
|
||||
You can download the latest FANTOIR data over there: https://www.data.gouv.fr/fr/datasets/fichier-fantoir-des-voies-et-lieux-dits/
|
||||
|
|
|
@ -17,16 +17,14 @@ trap clean_tmp EXIT
|
|||
tmpCsv="${tmpDir}"/fantoir.csv
|
||||
tmpSql="${tmpDir}"/import-fantoir.sql
|
||||
|
||||
echo "[+] Generating fantoir CSV"
|
||||
echo "Generating fantoir CSV"
|
||||
cargo run --release -- "$1" > "${tmpCsv}"
|
||||
|
||||
echo "[+] Generating fantoir SQLite DB"
|
||||
echo "Generating fantoir SQLite DB"
|
||||
cat >"${tmpSql}" <<EOF
|
||||
.separator ";"
|
||||
.import ${tmpCsv} fantoir
|
||||
EOF
|
||||
sqlite3 fantoir.sqlite < "${tmpSql}"
|
||||
echo "[+] Populating DB index"
|
||||
echo "CREATE INDEX insee_fantoir ON fantoir(full_insee);" | sqlite3 fantoir.sqlite
|
||||
|
||||
echo "[+] DB generated at $(pwd)/fantoir.sqlite"
|
||||
echo "DB generated at $(pwd)/fantoir.sqlite"
|
||||
|
|
265
src/main.rs
265
src/main.rs
|
@ -1,14 +1,45 @@
|
|||
use std::fs::File;
|
||||
use std::io::{BufReader, BufRead};
|
||||
|
||||
fn main() -> std::io::Result<()> {
|
||||
let fantoir_path = std::env::args().nth(1).unwrap();
|
||||
let file = match File::open(&fantoir_path) {
|
||||
Err(err) => panic!("Cannot read file {}: {}", fantoir_path, err),
|
||||
Ok(file) => file,
|
||||
};
|
||||
let reader = BufReader::new(file);
|
||||
println!("full_insee;rivoli_with_key;libelle");
|
||||
use rusqlite::{Connection, Result, Statement, params};
|
||||
|
||||
/// Create a new SQLite DB and sets the appropriate pragmas.
|
||||
///
|
||||
/// This is a one time batch import. We don't care about atomicity
|
||||
/// here. If the batch fails, then we'll start it over from scratch.
|
||||
/// Disabling the rollback journal speeds up quite a lot the inserts.
|
||||
///
|
||||
///
|
||||
fn conn_db (path: &str) -> Result<Connection> {
|
||||
let db = Connection::open(path)?;
|
||||
db.pragma_update(None,"journal_mode","OFF")?;
|
||||
db.pragma_update(None,"synchronous","OFF")?;
|
||||
db.execute(
|
||||
"CREATE TABLE fantoir (
|
||||
id INTEGER PRIMARY KEY,
|
||||
rivoli TEXT NOT NULL,
|
||||
insee TEXT NOT NULL,
|
||||
libelle TEXT NOT NULL
|
||||
)", []
|
||||
)?;
|
||||
Ok(db)
|
||||
}
|
||||
|
||||
/// Parsing a FANTOIR file. This is where all the business logic lives.
|
||||
///
|
||||
/// # Parsing logic
|
||||
///
|
||||
/// For each line, we try to figure out what kind of record type we're
|
||||
/// looking at. We're using some of the FANTOIR specificities to do so.
|
||||
///
|
||||
/// 1. If the 3rd char is empty, we can assume it's a "Département"
|
||||
/// record.
|
||||
/// 2. If the 3rd & 6th char are empty, it's a "Commune".
|
||||
/// 3. All the other record will be street records. In that case, we
|
||||
/// can extract the insee code, the rivoli code (with the key) and
|
||||
/// the street name (aka libelle).
|
||||
#[inline]
|
||||
fn parse_fantoir_lines(reader: BufReader<File>, mut stmt: Statement) -> () {
|
||||
for line in reader.lines() {
|
||||
let l = line.unwrap();
|
||||
if l.chars().nth(3) == Some(' ') {
|
||||
|
@ -19,207 +50,27 @@ fn main() -> std::io::Result<()> {
|
|||
let mut full_insee = String::from(l.get(0..2).unwrap());
|
||||
full_insee.push_str(l.get(3..6).unwrap());
|
||||
let rivoli_with_key = l.get(6..11).unwrap();
|
||||
let street_type = street_type(l.get(11..15).unwrap());
|
||||
let libelle = l.get(15..41).unwrap();
|
||||
println!("{};{};{}{}", full_insee, rivoli_with_key, street_type, libelle);
|
||||
let libelle = l.get(15..41).unwrap().trim();
|
||||
stmt.execute(params! [rivoli_with_key, full_insee, libelle]).unwrap();
|
||||
}
|
||||
};
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn street_type(street_type: &str) -> &str {
|
||||
match street_type {
|
||||
"ACH " => "ANCIEN CHEMIN ",
|
||||
"AER " => "AERODROME ",
|
||||
"AERG" => "AEROGARE ",
|
||||
"AGL " => "AGGLOMERATION ",
|
||||
"AIRE" => "AIRE ",
|
||||
"ALL " => "ALLEE ",
|
||||
"ANGL" => "ANGLE ",
|
||||
"ARC " => "ARCADE ",
|
||||
"ART " => "ANCIENNE ROUTE ",
|
||||
"AUT " => "AUTOROUTE ",
|
||||
"AV " => "AVENUE ",
|
||||
"BASE" => "BASE ",
|
||||
"BD " => "BOULEVARD ",
|
||||
"BER " => "BERGE ",
|
||||
"BORD" => "BORD ",
|
||||
"BRE " => "BARRIERE ",
|
||||
"BRG " => "BOURG ",
|
||||
"BRTL" => "BRETELLE ",
|
||||
"BSN " => "BASSIN ",
|
||||
"CAE " => "CARRIERA ",
|
||||
"CALL" => "CALLE, CALLADA ",
|
||||
"CAMI" => "CAMIN ",
|
||||
"CAMP" => "CAMP ",
|
||||
"CAN " => "CANAL ",
|
||||
"CAR " => "CARREFOUR ",
|
||||
"CARE" => "CARRIERE ",
|
||||
"CASR" => "CASERNE ",
|
||||
"CC " => "CHEMIN COMMUNAL ",
|
||||
"CD " => "CHEMIN DEPARTEMENTAL ",
|
||||
"CF " => "CHEMIN FORESTIER ",
|
||||
"CHA " => "CHASSE ",
|
||||
"CHE " => "CHEMIN ",
|
||||
"CHEM" => "CHEMINEMENT ",
|
||||
"CHL " => "CHALET ",
|
||||
"CHP " => "CHAMP ",
|
||||
"CHS " => "CHAUSSEE ",
|
||||
"CHT " => "CHATEAU ",
|
||||
"CHV " => "CHEMIN VICINAL ",
|
||||
"CITE" => "CITE ",
|
||||
"CIVE" => "COURSIVE ",
|
||||
"CLOS" => "CLOS ",
|
||||
"CLR " => "COULOIR ",
|
||||
"COIN" => "COIN ",
|
||||
"COL " => "COL ",
|
||||
"COR " => "CORNICHE ",
|
||||
"CORO" => "CORON ",
|
||||
"COTE" => "COTE ",
|
||||
"COUR" => "COUR ",
|
||||
"CPG " => "CAMPING ",
|
||||
"CR " => "CHEMIN RURAL ",
|
||||
"CRS " => "COURS ",
|
||||
"CRX " => "CROIX ",
|
||||
"CTR " => "CONTOUR ",
|
||||
"CTRE" => "CENTRE ",
|
||||
"DARS" => "DARSE, DARCE ",
|
||||
"DEVI" => "DEVIATION ",
|
||||
"DIG " => "DIGUE ",
|
||||
"DOM " => "DOMAINE ",
|
||||
"DRA " => "DRAILLE ",
|
||||
"DSC " => "DESCENTE ",
|
||||
"ECA " => "ECART ",
|
||||
"ECL " => "ECLUSE ",
|
||||
"EMBR" => "EMBRANCHEMENT ",
|
||||
"EMP " => "EMPLACEMENT ",
|
||||
"ENC " => "ENCLOS ",
|
||||
"ENV " => "ENCLAVE ",
|
||||
"ESC " => "ESCALIER ",
|
||||
"ESP " => "ESPLANADE ",
|
||||
"ESPA" => "ESPACE ",
|
||||
"ETNG" => "ETANG ",
|
||||
"FD " => "FOND ",
|
||||
"FG " => "FAUBOURG ",
|
||||
"FON " => "FONTAINE ",
|
||||
"FOR " => "FORET ",
|
||||
"FORT" => "FORT ",
|
||||
"FOS " => "FOSSE ",
|
||||
"FRM " => "FERME ",
|
||||
"GAL " => "GALERIE ",
|
||||
"GARE" => "GARE ",
|
||||
"GBD " => "GRAND BOULEVARD ",
|
||||
"GPL " => "GRANDE PLACE ",
|
||||
"GR " => "GRANDE RUE ",
|
||||
"GREV" => "GREVE ",
|
||||
"HAB " => "HABITATION ",
|
||||
"HAM " => "HAMEAU ",
|
||||
"HIP " => "HIPPODROME ",
|
||||
"HLE " => "HALLE ",
|
||||
"HLG " => "HALAGE ",
|
||||
"HLM " => "HLM ",
|
||||
"HTR " => "HAUTEUR ",
|
||||
"ILE " => "ILE ",
|
||||
"ILOT" => "ILOT ",
|
||||
"IMP " => "IMPASSE ",
|
||||
"JARD" => "JARDIN ",
|
||||
"JTE " => "JETEE ",
|
||||
"LAC " => "LAC ",
|
||||
"LEVE" => "LEVEE ",
|
||||
"LICE" => "LICES ",
|
||||
"LIGN" => "LIGNE ",
|
||||
"LOT " => "LOTISSEMENT ",
|
||||
"MAIL" => "MAIL ",
|
||||
"MAIS" => "MAISON ",
|
||||
"MAR " => "MARCHE ",
|
||||
"MARE" => "MARE ",
|
||||
"MAS " => "MAS ",
|
||||
"MNE " => "MORNE ",
|
||||
"MRN " => "MARINA ",
|
||||
"MTE " => "MONTEE ",
|
||||
"NTE " => "NOUVELLE ROUTE ",
|
||||
"PAE " => "PETITE AVENUE ",
|
||||
"PARC" => "PARC ",
|
||||
"PAS " => "PASSAGE ",
|
||||
"PASS" => "PASSE ",
|
||||
"PCH " => "PETIT CHEMIN ",
|
||||
"PCHE" => "PORCHE ",
|
||||
"PHAR" => "PHARE ",
|
||||
"PIST" => "PISTE ",
|
||||
"PKG " => "PARKING ",
|
||||
"PL " => "PLACE ",
|
||||
"PLA " => "PLACA ",
|
||||
"PLAG" => "PLAGE ",
|
||||
"PLAN" => "PLAN ",
|
||||
"PLCI" => "PLACIS ",
|
||||
"PLE " => "PASSERELLE ",
|
||||
"PLN " => "PLAINE ",
|
||||
"PLT " => "PLATEAU ",
|
||||
"PNT " => "POINTE ",
|
||||
"PONT" => "PONT ",
|
||||
"PORQ" => "PORTIQUE ",
|
||||
"PORT" => "PORT ",
|
||||
"POST" => "POSTE ",
|
||||
"POT " => "POTERNE ",
|
||||
"PROM" => "PROMENADE ",
|
||||
"PRT " => "PETITE, ROUTE ",
|
||||
"PRV " => "PARVIS ",
|
||||
"PTA " => "PETITE ALLEE ",
|
||||
"PTE " => "PORTE ",
|
||||
"PTR " => "PETITE RUE ",
|
||||
"PTTE" => "PLACETTE ",
|
||||
"QUA " => "QUARTIER ",
|
||||
"QUAI" => "QUAI ",
|
||||
"RAC " => "RACCOURCI ",
|
||||
"REM " => "REMPART ",
|
||||
"RES " => "RESIDENCE ",
|
||||
"RIVE" => "RIVE ",
|
||||
"RLE " => "RUELLE ",
|
||||
"ROC " => "ROCADE ",
|
||||
"RPE " => "RAMPE ",
|
||||
"RPT " => "ROND-POINT ",
|
||||
"RTD " => "ROTONDE ",
|
||||
"RTE " => "ROUTE ",
|
||||
"RUE " => "RUE ",
|
||||
"RUET" => "RUETTE ",
|
||||
"RUIS" => "RUISSEAU ",
|
||||
"RULT" => "RUELLETTE ",
|
||||
"RVE " => "RAVINE ",
|
||||
"SAS " => "SAS ",
|
||||
"SEN " => "SENTIER, SENTE ",
|
||||
"SQ " => "SQUARE ",
|
||||
"STDE" => "STADE ",
|
||||
"TER " => "TERRE ",
|
||||
"TOUR" => "TOUR ",
|
||||
"TPL " => "TERRE-PLEIN ",
|
||||
"TRA " => "TRAVERSE ",
|
||||
"TRAB" => "TRABOULE ",
|
||||
"TRN " => "TERRAIN ",
|
||||
"TRT " => "TERTRE ",
|
||||
"TSSE" => "TERRASSE ",
|
||||
"TUN " => "TUNNEL ",
|
||||
"VAL " => "VAL ",
|
||||
"VALL" => "VALLON, VALLEE ",
|
||||
"VC " => "VOIE COMMUNALE ",
|
||||
"VCHE" => "VIEUX CHEMIN ",
|
||||
"VEN " => "VENELLE ",
|
||||
"VGE " => "VILLAGE ",
|
||||
"VIA " => "VIA ",
|
||||
"VIAD" => "VIADUC ",
|
||||
"VIL " => "VILLE ",
|
||||
"VLA " => "VILLA ",
|
||||
"VOIE" => "VOIE ",
|
||||
"VOIR" => "VOIRIE ",
|
||||
"VOUT" => "VOUTE ",
|
||||
"VOY " => "VOYEUL ",
|
||||
"VTE " => "VIEILLE ROUTE ",
|
||||
"ZA " => "ZA ",
|
||||
"ZAC " => "ZAC ",
|
||||
"ZAD " => "ZAD ",
|
||||
"ZI " => "ZI ",
|
||||
"ZONE" => "ZONE ",
|
||||
"ZUP " => "ZUP ",
|
||||
_ => ""
|
||||
}
|
||||
fn main() -> std::io::Result<()> {
|
||||
let db = match conn_db("./fantoir.sqlite") {
|
||||
Err(err) => panic!("Cannot close DB: {}", err),
|
||||
Ok(conn) => conn
|
||||
};
|
||||
let stmt = db.prepare(
|
||||
"INSERT INTO fantoir (rivoli, insee, libelle) VALUES (?1, ?2, ?3)"
|
||||
).unwrap();
|
||||
let fantoir_path = std::env::args().nth(1).unwrap();
|
||||
let file = match File::open(&fantoir_path) {
|
||||
Err(err) => panic!("Cannot read file {}: {}", fantoir_path, err),
|
||||
Ok(file) => file,
|
||||
};
|
||||
let reader = BufReader::new(file);
|
||||
parse_fantoir_lines(reader, stmt);
|
||||
db.close();
|
||||
Ok(())
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue