Arkheia_Spreadsheet_Importer/butcher-xml

22 lines
822 B
Bash
Executable File

#!/usr/bin/env bash
# Arkeia is expecting a BOM at the front utf-8 file, AKA. exactly what
# the unicode spec tells you NOT TO DO... (W-T-F!!!!)
# If you miss the BOM, the file will be considered being ACII and
# screwing your accents...
# Add BOM
printf '\xEF\xBB\xBF' > $2
# Ah, yeah. Arkeia is also not expencting to get a valid XML but a
# *really* weird format instead. Basically they expect a set of XML
# elements for each entry. The entries being separated by a newline.
# Butching the XML file into something Arkeia will injest....
# In no particular order:
# - Removing <root> node.
# - Removing <entry> nodes.
# - Separating the entries by a newline.
xmllint --format $1 | sed '/root/d' | sed '/entry/d' | sed '/xml/d' | awk '{$1=$1};1' | tr -d '\n' | sed 's/<numseque>/\n<numseque>/g' | tail -n +2 >> $2