User talk:A.Plank/Permit and Loan Terms (Handbook)
From GGBN Wiki
Use HTML export from Google and then use Tidy and Pandoc to convert to MediaWiki text format, and then do the following modifications (it is just in development stage, no guarantee for perfect translitteration)
convert2mediawiki-mwt.sh
#!/bin/bash
# ….modified" # default modifications for all
# ….modified2" # particular modifications
# ….modified3" # modifications by hand
# # # # # # # # # # # # #
# dependency tidy
# dependency pandoc
# dependency sed
# for f in *.html;do
# f_new="${f}.mwt"
# f_htmtidy="${f%.*}.tidy.html"
# echo "# convert file to ${f_new} …"; pandoc -f html -t mediawiki "$f" > "${f_new}";
# done
# better use tidy
IFS=$'\n';
for f in $( find . -maxdepth 1 -iregex '\.[^.]+.html$' -and -not -iregex '\.[^.]+.tidy.html$' | sort );do
f_htmtidy="${f%.*}.tidy.html"
f_htmtidy_log="${f%.*}.tidy.log"
f_new="${f_htmtidy}.mwt"
echo -e "# convert file to ${f_htmtidy} … \e[34m${f_new}\e[0m ";
# tidy -export-default-config > config_of_tidy.txt
tidy -config config_of_tidy.txt "$f" > "$f_htmtidy" 2> "$f_htmtidy_log";
pandoc -f html -t mediawiki "$f_htmtidy" > "${f_new}";
done
for f in $( find . -maxdepth 1 -iregex '\.[^.]+.tidy.html.mwt$' | sort );do
f_new="${f}.modified" # default modifications for all
f_new2="${f}.modified2" # particular modifications
# f_new3="${f}.modified3" # modifications by hand
this_section=$(echo "$f" | sed --regexp-extended '
s@.tidy.html.mwt$@@; s@^./@@g;
s@.*(Document Category \(for discussion\)).*@\1@I;
s@Document Types w o IPR \(draft\)@Document Types w/o IPR (draft)@I;
s@intellectual Property Document Types \(draft\)@Intellectual Property Document Types (draft)@I;
s@ReadMe@Introduction@I;
s@\((AU specific permits)\)@\1@I;
'
)
this_section_html_anchor_id=$(echo "$f" | sed --regexp-extended '
s@.tidy.html.mwt$@@; s@^./@@g;
s@.*(AU specific permits).*@<span id="AU_specific_permits"></span>@I;
s@.*(EU specific permits).*@<span id="EU_specific_permits"></span>@I;
s@.*(US specific permits).*@<span id="US_specific_permits"></span>@I;
s@.*(Document Category \(for discussion\)).*@<span id="Document_Category_-_for_discussion"></span>@I;
s@.*(Document Category - final).*@<span id="Document_Category"></span>@I;
s@.*(Document Types w o IPR \(draft\)).*@<span id="Document_Types_w_o_IPR"></span>@I;
s@.*(intellectual Property Document Types \(draft\)).*@<span id="Intellectual_Property_Document_Types_-_draft"></span>@I;
s@.*(MTA examples).*@<span id="MTA_examples"></span>@I;
s@.*(Permit Types \(GGBN\)).*@<span id="Permit_Types_GGBN"></span>@I;
s@.*(Typology of Contents \(draft\)).*@<span id="Typology_of_Contents"></span>@I;
s@.*ReadMe.*@@I;
# s@.*().*@<span id=""></span>@I;
'
)
echo -e "# modify file to \e[34m${f_new}\e[0m …";
sed --regexp-extended --null-data '
s@\|\n<div class="[^ ]+"[^>]+>\n+([^<>\n]*?)\n+</div>@| \1@g; # table cells containers: removed
s@^<div class="ritz grid-container" dir="ltr">\n(.+)\n+</div>\n*$@\1@g; # wrapping the whole sheet as div-container probably: removed
' "$f" > "${f_new}";
sed --regexp-extended --null-data --in-place "1 i\== ${this_section}${this_section_html_anchor_id} ==\n\n
# make all td to one line only, mark newline by νεωλινε
/\n\|[[:blank:]][^|\n]+/{ # td with immediate following content
:trow2one_line # replace \n to greek letters of newline (as a marker)
s@(\n\|[[:blank:]][^|\n]+)\n([^\n|]+)@\1νεωλινε\2@;
ttrow2one_line;
}
/\n\|\n<div[^|\n]+/{ # td with immediate \n and *then* following content, e.g. <div> or anything
:trow_seems_formatted2one_line # replace \n to greek letters of newline (as a marker)
s@(\n\|\n<div[^|\n]+)\n([^\n|]+)@\1νεωλινε\2@;
s@(\n\|\n<div[^|\n]+)\n\n([^\n|]+)@\1νεωλινενεωλινε\2@;
s@(\n\|\n<div[^|\n]+)\n\n\n([^\n|]+)@\1νεωλινενεωλινενενεωλινε\2@;
ttrow_seems_formatted2one_line;
}
/\n\|\n<div[^|\n]+/{ # formatted td preprocessed td should be almost in one line
s@(\n\|)\n(<div[^|\n]+)@\1νεωλινε\2@g;
}
# s@(\n\|[^\n|]+)(<br */>\n){1,}@\1\n@g; # remove trailing <br/> (and misplaced at the line end and cell end)
# s@(<br */>)\n@\1νεωλινε@g;
s@(\.\.\.)@…@g;
s@ & @ \& @g;
# convert general formats
s@(<span style=\")font-weight:bold;(\">)([^<>]+)(</span>)@<strong>\3</strong>@g
s@(<span style=\")font-weight:bold;([^\"]+\">)([^<>]+)(</span>)@<strong>\1\2\3\4</strong>@g
s@(<span style=\")text-decoration:line-through;(\">)([^<>]+)(</span>)@<s>\3</s>@g
s@(<span style=\")text-decoration:line-through;([^\"]+\">)([^<>]+)(</span>)@<s>\1\2\3\4</s>@g
s@(<span style=\")text-decoration:underline;(\">)([^<>]+)(</span>)@<s>\3</s>@g
s@(<span style=\")text-decoration:underline;([^\"]+\">)([^<>]+)(</span>)@<s>\3</s>@g
s@(<span style=\")font-style:italic;([^\"]+\">)([^<>]+)(</span>)@<i>\1\2\3\4</i>@g
s@(<span style=\")font-style:italic;(\">)([^<>]+)(</span>)@<i>\3</i>@g
## # idea: (1) make only in-line wiki table rows (2) remove redundant rows (3) return to right Wiki table syntax
## # wiki-tabledata inline formatting: removed
## /<div class=\"softmerge-inner\"[^>]+>/,/<\/div>/ {
## s@\|\n<div class=\"softmerge-inner\"[^>]+>[\n]{2}([^\n]+)[\n]{2}</div>@| \1@g; # Einzeiler-Inhalte
## # s@\|\n<div class=\"softmerge-inner\"[^>]+>[\n]{2}(.+)[\n]{2}</div>@| \1@g; # Problem Mehrzeiler-Inhalte
## }
/νεωλινε<div class=\"[^<>\n|]+>[^\n]+<\/div>/ {
s@νεωλινε<div class=\"[^<>\n|]+>@@g;
s@νεωλινε</div>@@g;
} ## # wiki-tabledata inline formatting: removed
##
##
## #
## # s@\|-\n\|[[:blank:]]+[[:digit:]]+[[:blank:]]*(\n\|){1,}(\n\|-)@\2@g; # approach in original formatting: not yet working to remove perfectly redundant table rows
## #
##
## # marking Wiki-table-syntax and form only single rows of table data (no line break)
## s@\n(\|-)@\n<wiki_trow>\1@g;
## s@\n(\|\})@\n<wiki_endtable>\1@g;
## s@\n(\|)@\n<wiki_tdata>\1@g;
## s@\n(<wiki_tdata>)@ \1@g;
## #
## # make table data as Wiki inline syntax from \n|… \n|… into || … || …
## ## s@([[:blank:]]<wiki_tdata>\|)[[:blank:]]@\1| @g;
## s@(<wiki_tdata>\|)[[:blank:]]@\1| @g;
## s@([[:blank:]]<wiki_tdata>\|)[[:blank:]]*[\n]@\1|\n@g; # fix line end table data
## s@(\n<wiki_trow>\|-)([[:blank:]]<wiki_tdata>\|)\|@\1\2@g; # fix first table data: only one |
## #
## # should be all in-line: tr + td + td … now
## s@<wiki_trow>\|- <wiki_tdata>\| +[[:digit:]]+( +<wiki_tdata>\|\|){1,}\n@@g; # remove redundant “empty” table rows
## s@<wiki_trow>\|- <wiki_tdata>\|( +<wiki_tdata>\|\|){1,}\n@@g; # remove redundant “empty” table rows
## #
## # return to Wiki table syntax
## s@[[:blank:]]<wiki_tdata>(\|[[:blank:]])@\n\1@g;
## s@[[:blank:]]<wiki_tdata>(\|)\|@\n\1@g;
## s@<wiki_trow>(\|-)@\1@g;
## s@\n<wiki_endtable>(\|\})@\n\1@g;
##
/\"/,/\"/ { s@"([^\n&]+)"@“\1”@g; } # replace quotes to typographical ones: “…”
# final formattings
s@\{\|\n@{| class=\"vertical-align-top booktabledotted\"\n@g;
# remove all A, B, C headings
" \
"${f_new}"
# for i in {1..30}; do echo " s@(<wiki_tdata${i}>[^\\n]+)\\n(\|[[:blank:]\\n])@\1\\n<wiki_tdata$((i + 1))>\2@g;"; done
case "$this_section" in
ReadMe|Introduction)
echo -e "# further modifications → \e[34m${f_new2}\e[0m (section $this_section) …"
sed --regexp-extended --null-data '
s@\n(\|-\n)(\|[[:blank:]\n])@\n\1<wiki_tdata1>\2@g;
s@(<wiki_tdata1>[^\n]+)\n(\|[[:blank:]\n])@\1\n<wiki_tdata2>\2@g;
s@(<wiki_tdata2>[^\n]+)\n(\|[[:blank:]\n])@\1\n<wiki_tdata3>\2@g;
s@(<wiki_tdata3>[^\n]+)\n(\|[[:blank:]\n])@\1\n<wiki_tdata4>\2@g;
s@(<wiki_tdata4>[^\n]+)\n(\|[[:blank:]\n])@\1\n<wiki_tdata5>\2@g;
s@\n(<wiki_tdata[[:digit:]]>\|)([[:blank:]\n])@ \1\2@g;
# remove index column
s@(\n|- )<wiki_tdata1>\| [[:digit:]]+ (<wiki_tdata2>)@\1\2@g;
# remove last empty column
s@<wiki_tdata3>\|([[:blank:]]*)\n@\1\n@g;
s@ <wiki_tdata3>\|[[:blank:]]+([^[:blank:]])@: \1@g;
s@\n\|- <wiki_tdata2>\|[[:blank:]]+([^[:blank:]])@\n\1@g;
/\"/,/\"/ { s@"([^\n&]+)"@“\1”@g; }
' "${f_new}" > "${f_new2}"
;;
"Document Category - final"\
|"Document Category (for discussion)"\
|"Document Types w/o IPR (draft)"\
|"EU specific permits"\
|"Intellectual Property Document Types (draft)"\
|"MTA examples"|"Permit Types (GGBN)"|"Typology of Contents (draft)"|"US specific permits")
echo -e "# further modifications → \e[34m${f_new2}\e[0m (section $this_section) …"
sed --regexp-extended --null-data '
s@\n(\|-\n)(\|[[:blank:]\n])@\n\1<wiki_tdata1>\2@g;
# /\n\|[[:blank:]][^|\n]+/{
# :trow2oneline s@(\n\|[[:blank:]][^|\n]+)\n([^\n|])@\1νεωλινε\2@; # replace \n to greek letters of newline (as a marker)
# ttrow2oneline;
# }
# s@(<br */>)\n@\1@g; # versuche tabledata zu Einzeilern zu machen
# # geht nur, wenn trow Einzeiler!
# for i in {31..41}; do echo " s@(<wiki_tdata${i}>[^\\n]+)\\n(\|([[:blank:]\\n]|νεωλινε))@\1\\n<wiki_tdata$((i + 1))>\2@g;"; done
s@(<wiki_tdata1>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata2>\2@g;
s@(<wiki_tdata2>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata3>\2@g;
s@(<wiki_tdata3>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata4>\2@g;
s@(<wiki_tdata4>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata5>\2@g;
s@(<wiki_tdata5>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata6>\2@g;
s@(<wiki_tdata6>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata7>\2@g;
s@(<wiki_tdata7>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata8>\2@g;
s@(<wiki_tdata8>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata9>\2@g;
s@(<wiki_tdata9>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata10>\2@g;
s@(<wiki_tdata10>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata11>\2@g;
s@(<wiki_tdata11>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata12>\2@g;
s@(<wiki_tdata12>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata13>\2@g;
s@(<wiki_tdata13>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata14>\2@g;
s@(<wiki_tdata14>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata15>\2@g;
s@(<wiki_tdata15>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata16>\2@g;
s@(<wiki_tdata16>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata17>\2@g;
s@(<wiki_tdata17>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata18>\2@g;
s@(<wiki_tdata18>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata19>\2@g;
s@(<wiki_tdata19>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata20>\2@g;
s@(<wiki_tdata20>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata21>\2@g;
s@(<wiki_tdata21>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata22>\2@g;
s@(<wiki_tdata22>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata23>\2@g;
s@(<wiki_tdata23>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata24>\2@g;
s@(<wiki_tdata24>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata25>\2@g;
s@(<wiki_tdata25>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata26>\2@g;
s@(<wiki_tdata26>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata27>\2@g;
s@(<wiki_tdata27>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata28>\2@g;
s@(<wiki_tdata28>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata29>\2@g;
s@(<wiki_tdata29>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata30>\2@g;
s@(<wiki_tdata30>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata31>\2@g;
s@(<wiki_tdata30>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata31>\2@g;
s@(<wiki_tdata31>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata32>\2@g;
s@(<wiki_tdata32>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata33>\2@g;
s@(<wiki_tdata33>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata34>\2@g;
s@(<wiki_tdata34>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata35>\2@g;
s@(<wiki_tdata35>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata36>\2@g;
s@(<wiki_tdata36>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata37>\2@g;
s@(<wiki_tdata37>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata38>\2@g;
s@(<wiki_tdata38>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata39>\2@g;
s@(<wiki_tdata39>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata40>\2@g;
s@(<wiki_tdata40>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata41>\2@g;
s@(<wiki_tdata41>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata42>\2@g;
/\n(<wiki_tdata[[:digit:]]+>\|)([[:blank:]\n]|νεωλινε)/{
:wiki_tdata2one_line;
s@\n(<wiki_tdata[[:digit:]]+>\|)([[:blank:]\n]|νεωλινε)@ \1\2@g; # convert tabledata to one row
twiki_tdata2one_line;
}
## remove first column, the index column
s@(\n|- )<wiki_tdata1>\| [[:digit:]]+ (<wiki_tdata2>)@\1\2@g;
s@(\n|- )<wiki_tdata1>\| (<wiki_tdata2>)@\1\2@g;
## (wiki_tdata2: must be the 1st | Wiki column not || attention!)
s@(<wiki_tdata[3-9]>\|)([[:blank:]]+|νεωλινε)@\1|\2@g; # add || for 2-9 tabledata
s@(<wiki_tdata[[:digit:]]{2,}>\|)([[:blank:]]+|νεωλινε)@\1|\2@g; # add || for 10-99 tabledata
s@(<wiki_tdata[[:digit:]]{2,}>\|)(\n)@\1|\2@g; # add || for the last tabledata (10-99 tabledata)
s@(<wiki_tdata[2-9]>\|)(\n)@\1|\2@g; # add || for the last tabledata (2-9 tabledata)
## should be all in-line: tr + td + td … now
s@<wiki_tdata[[:digit:]]+>@@g; # have only Wiki-inline || … ||
s@( +\|\|){1,}\n@\1\n@g; # remove redundant “empty” trailing table data
s@ +\|\|\n@\n@g; # remove last empty table data
# remove empty left rows
/\n(\|- \|\n)/{
:empty_left_trow s@\n(\|- \|\n)@\n@g; tempty_left_trow
}
s@\n(\|-) (\|[^|\n]+)@\n\1\n\2@g; # line break for first column
s@ *\|\|( |νεωλινε)@\n|\1@g; # all || to \n|
## clean up multiple \n and <br /> immediately after |
s@\n\|(νεωλινε|<br */>){1,}@\n| @g;
' "${f_new}" > "${f_new2}"
###############
# from here on tr and td should be single rows, sometimes with νεωλινε marker in it all other line breaks shall and should have \n
###############
if [[ "$this_section" == "Document Category - final" ]]; then
echo -e "# further \e[34mc\e[0m\e[33mo\e[0m\e[32ml\e[0m\e[31mo\e[0m\e[34mr\e[0m modifications → \e[34m${f_new2}\e[0m (section $this_section) …"
sed --regexp-extended --in-place --null-data '
s@\n\|-(\n\| Access and Benefit-Sharing Document)@\n|- style="background-color: #f4cccc;"\1@;
s@\n\|-(\n\| High level arrangements)@\n|- style="background-color: #f9cb9c;"\1@;
s@\n\|-(\n\| permits for collecting[^\n]+)@\n|- style="background-color: #fff2cc;"\1@;
s@\n\|-(\n\| Research Permission)@\n|- style="background-color: #d9ead3;"\1@;
s@\n\|-(\n\| material transfer agreements[^\n]+)@\n|- style="background-color: #d0e0e3;"\1@;
s@\n\|-(\n\| Transport Documents)@\n|- style="background-color: #c9daf8;"\1@;
s@\n\|-(\n\| Exemption Permission[^\n]+)@\n|- style="background-color: #d9d2e9;"\1@;
' "${f_new2}"
fi
if [[ "$this_section" == "Document Types w/o IPR (draft)" ]]; then
echo -e "# further \e[34mc\e[0m\e[33mo\e[0m\e[32ml\e[0m\e[31mo\e[0m\e[34mr\e[0m modifications → \e[34m${f_new2}\e[0m (section $this_section) …"
sed --regexp-extended --in-place --null-data ' # set trow to color
s@\n(\|-)(\n\| Access and Benefit-Sharing[^\n]+)@\n\1 style="background-color: #f4cccc;"\2@Ig;
s@\n(\|-)(\n\| High Level Arrangements[^\n]*)@\n\1 style="background-color: #f9cb9c;"\2@Ig;
s@\n(\|-)(\n\| permits for collecting[^\n]*)@\n\1 style="background-color: #fff2cc;"\2@Ig;
s@\n(\|-)(\n\| Research Permission[^\n]*)@\n\1 style="background-color: #d9ead3;"\2@Ig;
s@\n(\|-)(\n\| material transfer agreements[^\n]*)@\n\1 style="background-color: #d0e0e3;"\2@Ig;
s@\n(\|-)(\n\| Transport Document[^\n]*)@\n\1 style="background-color: #c9daf8;"\2@Ig;
s@\n(\|-)(\n\| Exemption Permission[^\n]+)@\n\1 style="background-color: #d9d2e9;"\2@Ig;
# check from 2nd column «status term»: rows with «will be removed»,
# «Term & definition approved» → green #93c47d
# «Term approved, check definition» → yellow #ffd966
# «Review in progress» → yellow #ffd966
# «under Review» → yellow #ffd966
# «will be deleted» → gray #cccccc
# «will be removed» → gray #cccccc
s@\n(\|-)([^\n]*)\n(\|[^\n]*)\n(\|)( will be (deleted|removed))@\n\1 style="background-color: #cccccc;"\n\3\n\4\5@Ig;
s@\n(\|-)([^\n]*)\n(\|[^\n]*)\n(\|)( Term & definition approved)@\n\1\2\n\3\n\4 style="background-color: #93c47d;" |\5@Ig;
s@\n(\|-)([^\n]*)\n(\|[^\n]*)\n(\|)( Term approved, check definition)@\n\1\2\n\3\n\4 style="background-color: #ffd966;" |\5@Ig;
s@\n(\|-)([^\n]*)\n(\|[^\n]*)\n(\|)( Review in progress)@\n\1\2\n\3\n\4 style="background-color: #ffd966;" |\5@Ig;
s@\n(\|-)([^\n]*)\n(\|[^\n]*)\n(\|)( under Review)@\n\1\2\n\3\n\4 style="background-color: #ffd966;" |\5@Ig;
' "${f_new2}"
fi
if [[ "$this_section" == "Intellectual Property Document Types (draft)" ]]; then
echo -e "# further \e[34mc\e[0m\e[33mo\e[0m\e[32ml\e[0m\e[31mo\e[0m\e[34mr\e[0m modifications → \e[34m${f_new2}\e[0m (section $this_section) …"
sed --regexp-extended --in-place --null-data ' # set trow to color
# check from 2nd column «status term»: rows with «will be removed»,
# «Term approved, check definition» → green #93c47d
# «Review in progress» → orange #ff9900
# «Patent License» → red #ff0000
s@\n(\|-)([^\n]*)\n(\|[^\n]*)\n(\|)( Term approved, check definition)@\n\1\2\n\3\n\4 style="background-color: #93c47d;" |\5@Ig;
s@\n(\|-)([^\n]*)\n(\|[^\n]*)\n(\|)( Review in progress)@\n\1\2\n\3\n\4 style="background-color: #ff9900;" |\5@Ig;
# check from 3rd column
s@\n(\|-)([^\n]*)\n(\|[^\n]*)\n(\|)([^\n]*)\n(\|)([^\n]*Patent License[^\n]*)@\n\1\2\n\3\n\4\5\n\6 style="background-color: #ff0000;" |\7@Ig;
' "${f_new2}"
fi
if [[ "$this_section" == "MTA examples" ]]; then
echo -e "# further \e[34mc\e[0m\e[33mo\e[0m\e[32ml\e[0m\e[31mo\e[0m\e[34mr\e[0m modifications → \e[34m${f_new2}\e[0m (section $this_section) …"
sed --regexp-extended --in-place --null-data '
s@\n(\| Term approved[^\n]+)@\n| style="background-color: #93c47d;"\1@g;
' "${f_new2}"
fi
if [[ "$this_section" == "Typology of Contents (draft)" ]]; then
echo -e "# further \e[34mc\e[0m\e[33mo\e[0m\e[32ml\e[0m\e[31mo\e[0m\e[34mr\e[0m modifications → \e[34m${f_new2}\e[0m (section $this_section) …"
sed --regexp-extended --in-place --null-data ' # replaced removed
# check from 3rd column «status term»: rows with «will be removed»,
# «removed» → gray #cccccc
# «replaced» → gray #cccccc
# «under review» → yellow #ffd966 TODO
s@\n(\|-)([^\n]*)\n(\|[^\n]*)\n(\|)([^\n]*)\n(\|)( replaced| removed)@\n\1 style="background-color: #cccccc;"\n\3\n\4\5\n\6\7@Ig;
s@\n(\|-)([^\n]*)\n(\|[^\n]*)\n(\|)([^\n]*)\n(\|)( under review)@\n\1\n\3\n\4\5\n\6 style="background-color: #ffd966;"|\7@Ig;
# y green #b6d7a8
# possible green #b6d7a8
# n/a orange #f9cb9c
s@(\| )(Y\n)@\1style="background-color: #b6d7a8;"| \2@Ig;
s@(\| )(possible\n)@\1style="background-color: #b6d7a8;"| \2@Ig;
s@(\| )(n/a\n)@\1style="background-color: #f9cb9c;"| \2@Ig;
# TODO remove color from trow that has gray #cccccc
## /\n\|- style="background-color: #cccccc;"\n/{
## :trow_status_removed
## /\n\|- style="background-color: #cccccc;"\n([^\n]+){1,}/{
## }
## }
' "${f_new2}"
fi
;;
*)
;;
esac
# for clean_up_file in … TODO
# clean up trailing <br/> within td … TODO
# remove null character
sed --in-place '
s@\x0@@g;
s@νεωλινε@\n@g; # return \n from νεωλινε as it was before
' "${f_new}"
if [[ -e "${f_new2}" ]]; then
sed --in-place '
s@\x0@@g;
s@νεωλινε@\n@g; # return \n from νεωλινε as it was before
' "${f_new2}";
fi
done
unset IFS
config_of_tidy.txt
accessibility-check: 0 (Tidy Classic) add-meta-charset: no add-xml-decl: no add-xml-space: no alt-text: anchor-as-name: yes ascii-chars: no assume-xml-procins: no bare: no break-before-br: no char-encoding: utf8 clean: no coerce-endtags: yes css-prefix: c custom-tags: no decorate-inferred-ul: no doctype: auto drop-empty-elements: yes drop-empty-paras: yes drop-proprietary-attributes: no enclose-block-text: no enclose-text: no error-file: escape-cdata: no escape-scripts: yes fix-backslash: yes fix-bad-comments: auto fix-style-tags: yes fix-uri: yes force-output: no gdoc: no gnu-emacs: no hide-comments: no indent: no indent-attributes: no indent-cdata: no indent-spaces: 2 indent-with-tabs: no input-encoding: utf8 input-xml: no join-classes: no join-styles: yes keep-tabs: no keep-time: no literal-attributes: no logical-emphasis: no lower-literals: yes markup: yes merge-divs: auto merge-emphasis: yes merge-spans: auto mute: mute-id: no ncr: yes new-blocklevel-tags: new-empty-tags: new-inline-tags: new-pre-tags: newline: LF numeric-entities: no omit-optional-tags: no output-bom: auto output-encoding: utf8 output-file: output-html: no output-xhtml: no output-xml: no preserve-entities: no priority-attributes: punctuation-wrap: no quiet: no quote-ampersand: yes quote-marks: no quote-nbsp: yes repeated-attributes: keep-last replace-color: no show-body-only: no show-errors: 6 show-filename: no show-info: yes show-meta-change: no show-warnings: yes skip-nested: yes sort-attributes: none strict-tags-attributes: no tab-size: 8 tidy-mark: yes uppercase-attributes: no uppercase-tags: no vertical-space: no warn-proprietary-attributes: yes word-2000: no wrap: 0 wrap-asp: yes wrap-attributes: no wrap-jste: yes wrap-php: no wrap-script-literals: no wrap-sections: yes write-back: no