commit 8dd5cb8dbb1da23841059b86a008283209e1ad12
Author: Félix Baylac-Jacqué <felix@alternativebit.fr>
Date:   Sun Jan 17 16:34:53 2021 +0100

    Initial spreadsheet importer for Arkhéia
    
    Pretty crude implementation of a Libre Office Spreadsheet => Arkhéia
    DB.
    
    The Arkhéia format is totally bonkers. This implementation has been
    tested with a pretty small sample file. While it does seem to work,
    I'm still not 100% this will correctly scale on a larger import
    sample.
    
    Let's hope for the best and fix stuff along the way :)

diff --git a/butcher-xml b/butcher-xml
new file mode 100755
index 0000000..e5080e0
--- /dev/null
+++ b/butcher-xml
@@ -0,0 +1,21 @@
+#!/usr/bin/env bash
+
+# Arkeia is expecting a BOM at the front utf-8 file, AKA. exactly what
+# the unicode spec tells you NOT TO DO... (W-T-F!!!!)
+
+# If you miss the BOM, the file will be considered being ACII and
+# screwing your accents...
+
+# Add BOM
+printf '\xEF\xBB\xBF' > $2
+
+# Ah, yeah. Arkeia is also not expencting to get a valid XML but a
+# *really* weird format instead. Basically they expect a set of XML
+# elements for each entry. The entries being separated by a newline.
+
+# Butching the XML file into something Arkeia will injest....
+# In no particular order:
+# - Removing <root> node.
+# - Removing <entry> nodes.
+# - Separating the entries by a newline.
+xmllint --format $1 | sed '/root/d' | sed '/entry/d' | sed '/xml/d' | awk '{$1=$1};1' |  tr -d '\n' | sed 's/<numseque>/\n<numseque>/g' | tail -n +2 >> $2
diff --git a/import-spreadsheet b/import-spreadsheet
new file mode 100755
index 0000000..1ab6d66
--- /dev/null
+++ b/import-spreadsheet
@@ -0,0 +1,11 @@
+#!/usr/bin/env nix-shell
+#!nix-shell -i bash -p libxml2
+
+if [[ -z $1 || -z $2 ]]; then
+    echo "usage: import-spreadsheet SPREADSHEET OUTPUT_FILE"
+    exit 1
+fi
+
+tmpFile=$(mktemp)
+./bin/python import.py $1 $tmpFile
+./butcher-xml $tmpFile $2
diff --git a/import.py b/import.py
new file mode 100644
index 0000000..881ce8e
--- /dev/null
+++ b/import.py
@@ -0,0 +1,26 @@
+from pyexcel_ods import get_data
+import sys
+import xml.etree.cElementTree as ET
+
+def process_line(field_names, line, root):
+    """
+    """
+    if(len(line) <= 0):
+        return ""
+    line_dict = dict(enumerate(line))
+    xml_line_node = ET.SubElement(root, "entry")
+    for field_index in range(len(field_names)):
+        # Python lists do not have a safe get.
+        # Converting it to a dict to get this safe get.
+        ET.SubElement(xml_line_node, field_names[field_index]).text = \
+          str(line_dict.get(field_index,""))
+
+if __name__ == '__main__':
+    spreadsheet_path = sys.argv[1]
+    out_path = sys.argv[2]
+    table = get_data(spreadsheet_path)['Sheet1']
+    root = ET.Element("root")
+    for line in table[1:]:
+       process_line(table[0], line, root)
+    tree = ET.ElementTree(root)
+    tree.write(out_path, encoding="utf8")