User talk:A.Plank/Permit and Loan Terms (Handbook)

`convert2mediawiki-mwt.sh`

#!/bin/bash
#   ….modified"  # default modifications for all
#   ….modified2" # particular modifications
#   ….modified3" # modifications by hand
# # # # # # # # # # # # # 
# dependency tidy
# dependency pandoc
# dependency sed

# for f in *.html;do 
#   f_new="${f}.mwt"
#   f_htmtidy="${f%.*}.tidy.html"
#   echo "# convert file to ${f_new} …"; pandoc -f html -t mediawiki "$f" > "${f_new}"; 
# done

# better use tidy
IFS=$'\n';

for f in $( find . -maxdepth 1 -iregex '\.[^.]+.html$' -and -not -iregex '\.[^.]+.tidy.html$' | sort );do 
  f_htmtidy="${f%.*}.tidy.html"
  f_htmtidy_log="${f%.*}.tidy.log"
  f_new="${f_htmtidy}.mwt"
  echo -e "# convert file to ${f_htmtidy} … \e[34m${f_new}\e[0m "; 
  # tidy -export-default-config > config_of_tidy.txt
  tidy -config config_of_tidy.txt "$f" > "$f_htmtidy" 2> "$f_htmtidy_log";
  pandoc -f html -t mediawiki "$f_htmtidy" > "${f_new}"; 
done

for f in $( find . -maxdepth 1 -iregex '\.[^.]+.tidy.html.mwt$' | sort ); # only not modified, original files
  do 
  f_new="${f}.modified"   # default modifications for all
  f_new2="${f}.modified2" # particular modifications
  # f_new3="${f}.modified3" # modifications by hand
  
  this_section=$(echo "$f" | sed --regexp-extended '
    s@.tidy.html.mwt$@@; s@^./@@g; 
    s@.*(Document Category \(for discussion\)).*@\1@I;
    s@.*(Document Category[ -–]+final).*@Overview@I;
    s@Document Types w o IPR[ -–]+[(]*draft[)]*@Document Types w/o IPR (draft)@I;
    s@Document Types w o IPR[ -–]+[(]*final[)]*@Biodiversity Permit/Contract Typology@I;
    s@intellectual Property Document Types \(draft\)@Intellectual Property Document Types (draft)@I;
    s@ReadMe@Introduction@I;
    s@\((AU specific permits)\)@\1@I;
    '
  )
  this_standardized_file_basename=$(echo "$f" | sed --regexp-extended '
    s@.tidy.html.mwt$@@; s@^./@@g; 
    s@.*(Document Category \(for discussion\)).*@\1@I;
    s@.*(Document Category[ -–]+[(]*final[)]).*@Document Category (final)@I;
    s@Document Types w[ -–]+o IPR[ -–]+[(]*draft[)]*@Document Types without IPR (draft)@I;
    s@Document Types w[ -–]+o IPR[ -–]+[(]*final[)]*@Document Types without IPR (final)@I;
    s@intellectual Property Document Types[ -–]+[(]*draft[)]*@Intellectual Property Document Types (draft)@I;
    s@intellectual Property Document Types[ -–]+[(]*final[)]*@Intellectual Property Document Types (final)@I;
    s@Typology of Contents[ -–]+[(]*draft[)]*@Typology of Contents (draft)@I;
    s@Typology of Contents[ -–]+[(]*final[)]*@Typology of Contents (final)@I;
    s@ReadMe@Readme@I;
    s@\((AU specific permits)\)@\1@I;
    '
  )

  # this_wiki_site is perhaps unused
  this_wiki_site=$(echo "$f" | sed --regexp-extended '
    s@.tidy.html.mwt$@@; s@^./@@g; 
    s@^ *Document Category[ -–]+final *$@The Biodiversity Permit/Contract Typology (Permit and Loan Terms, Handbook)@I;
    s@^ *Document Types w o IPR[ -–]+[(]*final[)]* *$@The Biodiversity Permit/Contract Typology (Permit and Loan Terms, Handbook)@I;
    s@^ *ReadMe *$@Permit and Loan Terms (Handbook)@I;
    s@^ *Typology of Contents[ -–]+[(]*draft[)]* *$@Typology of Legal/Contractual Terms for Biodiversity Specimens (Permit and Loan Terms, Handbook)@;
    # s@\((AU specific permits)\)@\1@I;
    '
  )
  this_section_html_anchor_id=$(echo "$f" | sed --regexp-extended '
    s@.tidy.html.mwt$@@; s@^./@@g; 
    s@.*(AU specific permits).*@<span id="AU_specific_permits"></span>@I;
    s@.*(EU specific permits).*@<span id="EU_specific_permits"></span>@I;
    s@.*(US specific permits).*@<span id="US_specific_permits"></span>@I;
    s@.*(Document Category \(for discussion\)).*@<span id="Document_Category_-_for_discussion"></span>@I;
    s@.*(Document Category - final).*@<span id="Document_Category"></span>@I;
    s@.*(Document Types w o IPR[ -–]+[(]*draft[)]*).*@<span id="Document_Types_without_IPR_draft"></span>@I;
    s@.*(Document Types w o IPR[ -–]+[(]*final[)]*).*@<span id="Document_Types_without_IPR_final"></span>@I;
    s@.*(intellectual Property Document Types[ -–]+[(]*draft[)]).*@<span id="Intellectual_Property_Document_Types_-_draft"></span>@I;
    s@.*(MTA examples).*@<span id="MTA_examples"></span>@I;
    s@.*(Permit Types \(GGBN\)).*@<span id="Permit_Types_GGBN"></span>@I;
    s@^.*(Typology of Contents[ -–]+[(]*draft[)]).*$@<span id="Typology_of_Contents"></span>@I;
    s@.*ReadMe.*@@I;
    /^<span/!{ s@ +@_@g; s@^(.+)$@<span id="unknown_conversion_in_testing_mode_\1"></span>@;}
    '
  )
  
  echo -e "# modify file to \e[34m${f_new}\e[0m …"; 
  sed --regexp-extended --null-data '
    s@\|\n<div class="[^ ]+"[^>]+>\n+([^<>\n]*?)\n+</div>@| \1@g; # table cells containers: removed
    s@^<div class="ritz grid-container" dir="ltr">\n(.+)\n+</div>\n*$@\1@g; # wrapping the whole sheet as div-container probably: removed
  ' "$f" > "${f_new}"; 

  sed --regexp-extended --null-data --in-place "1 i\== ${this_section}${this_section_html_anchor_id} ==\n\n

   # make all td to one line only, mark newline by νεωλινε
    /\n\|[[:blank:]][^|\n]+/{ # td with immediate following content
      :trow2one_line # replace \n to greek letters of newline (as a marker)
        s@(\n\|[[:blank:]][^|\n]+)\n([^\n|]+)@\1νεωλινε\2@; 
      ttrow2one_line;
    }
    /\n\|\n<div[^|\n]+/{ # td with immediate \n and *then* following content, e.g. <div> or anything
      :trow_seems_formatted2one_line # replace \n to greek letters of newline (as a marker)
        s@(\n\|\n<div[^|\n]+)\n([^\n|]+)@\1νεωλινε\2@; 
        s@(\n\|\n<div[^|\n]+)\n\n([^\n|]+)@\1νεωλινενεωλινε\2@; 
        s@(\n\|\n<div[^|\n]+)\n\n\n([^\n|]+)@\1νεωλινενεωλινενενεωλινε\2@; 
      ttrow_seems_formatted2one_line;
    }
    /\n\|\n<div[^|\n]+/{ # formatted td preprocessed td should be almost in one line
      s@(\n\|)\n(<div[^|\n]+)@\1νεωλινε\2@g;
    }

    # s@(\n\|[^\n|]+)(<br */>\n){1,}@\1\n@g; # remove trailing <br/> (and misplaced at the line end and cell end) 
    # s@(<br */>)\n@\1νεωλινε@g;
    s@(\.\.\.)@…@g;
    s@ &amp; @ \& @g;
  
    # convert general formats
    s@(<span style=\")font-weight:bold;(\">)([^<>]+)(</span>)@<strong>\3</strong>@g
    s@(<span style=\")font-weight:bold;([^\"]+\">)([^<>]+)(</span>)@<strong>\1\2\3\4</strong>@g
    
    s@(<span style=\")text-decoration:line-through;(\">)([^<>]+)(</span>)@<s>\3</s>@g
    s@(<span style=\")text-decoration:line-through;([^\"]+\">)([^<>]+)(</span>)@<s>\1\2\3\4</s>@g
    
    s@(<span style=\")text-decoration:underline;(\">)([^<>]+)(</span>)@<u>\3</u>@g
    s@(<span style=\")text-decoration:underline;([^\"]+\">)([^<>]+)(</span>)@<u>\3</u>@g
    
    s@(<span style=\")font-style:italic;([^\"]+\">)([^<>]+)(</span>)@<i>\1\2\3\4</i>@g
    s@(<span style=\")font-style:italic;(\">)([^<>]+)(</span>)@<i>\3</i>@g
    
    ## # idea: (1) make only in-line wiki table rows (2) remove redundant rows (3) return to right Wiki table syntax
    ## # wiki-tabledata inline formatting: removed
    ## /<div class=\"softmerge-inner\"[^>]+>/,/<\/div>/ {
    ##   s@\|\n<div class=\"softmerge-inner\"[^>]+>[\n]{2}([^\n]+)[\n]{2}</div>@| \1@g; # Einzeiler-Inhalte
    ##   # s@\|\n<div class=\"softmerge-inner\"[^>]+>[\n]{2}(.+)[\n]{2}</div>@| \1@g;  # Problem Mehrzeiler-Inhalte
    ## }
    /νεωλινε<div class=\"[^<>\n|]+>[^\n]+<\/div>/ {
      s@νεωλινε<div class=\"[^<>\n|]+>@@g;
      s@νεωλινε</div>@@g;
    }    ## # wiki-tabledata inline formatting: removed
    ## 
    ## 
    ## #
    ## # s@\|-\n\|[[:blank:]]+[[:digit:]]+[[:blank:]]*(\n\|){1,}(\n\|-)@\2@g; # approach in original formatting: not yet working to remove perfectly redundant table rows
    ## #
    ## 
    ## # marking Wiki-table-syntax and form only single rows of table data (no line break)
    ## s@\n(\|-)@\n<wiki_trow>\1@g;
    ## s@\n(\|\})@\n<wiki_endtable>\1@g;
    ## s@\n(\|)@\n<wiki_tdata>\1@g;
    ## s@\n(<wiki_tdata>)@ \1@g;
    ## #
    ## # make table data as Wiki inline syntax from \n|… \n|… into || … || …
    ## ## s@([[:blank:]]<wiki_tdata>\|)[[:blank:]]@\1| @g;
    ## s@(<wiki_tdata>\|)[[:blank:]]@\1| @g;
    ## s@([[:blank:]]<wiki_tdata>\|)[[:blank:]]*[\n]@\1|\n@g; # fix line end table data
    ## s@(\n<wiki_trow>\|-)([[:blank:]]<wiki_tdata>\|)\|@\1\2@g; # fix first table data: only one |
    ## #
    ## # should be all in-line: tr + td + td … now
    ## s@<wiki_trow>\|- <wiki_tdata>\| +[[:digit:]]+( +<wiki_tdata>\|\|){1,}\n@@g; # remove redundant “empty” table rows
    ## s@<wiki_trow>\|- <wiki_tdata>\|( +<wiki_tdata>\|\|){1,}\n@@g; # remove redundant “empty” table rows
    ## #
    ## # return to Wiki table syntax
    ## s@[[:blank:]]<wiki_tdata>(\|[[:blank:]])@\n\1@g;
    ## s@[[:blank:]]<wiki_tdata>(\|)\|@\n\1@g;
    ## s@<wiki_trow>(\|-)@\1@g;
    ## s@\n<wiki_endtable>(\|\})@\n\1@g;
    ## 
    /\&quot;/,/\&quot;/ { s@&quot;([^\n&]+)&quot;@“\1”@g; } # replace quotes to typographical ones: “…”
    # final formattings
    s@\{\|\n@{| class=\"vertical-align-top booktabledotted\"\n@g;
    # remove all A, B, C headings?
    # format some columns
    s@\| (Repmarks\n)@| style=\"min-width:150px\" | \1@Ig;
    s@\| (suggested improvements\n)@| style=\"min-width:150px\" | \1@Ig;
    " \
    "${f_new}"
    
    
    # for i in {1..30}; do echo "  s@(<wiki_tdata${i}>[^\\n]+)\\n(\|[[:blank:]\\n])@\1\\n<wiki_tdata$((i + 1))>\2@g;"; done
    case "$this_standardized_file_basename" in
    Readme)  
      echo -e "#   further modifications → \e[34m${f_new2}\e[0m (file $this_standardized_file_basename, section $this_section) …"
      sed --regexp-extended --null-data  '
      s@\n(\|-\n)(\|[[:blank:]\n])@\n\1<wiki_tdata1>\2@g;
      s@(<wiki_tdata1>[^\n]+)\n(\|[[:blank:]\n])@\1\n<wiki_tdata2>\2@g;
      s@(<wiki_tdata2>[^\n]+)\n(\|[[:blank:]\n])@\1\n<wiki_tdata3>\2@g; 
      s@(<wiki_tdata3>[^\n]+)\n(\|[[:blank:]\n])@\1\n<wiki_tdata4>\2@g;  
      s@(<wiki_tdata4>[^\n]+)\n(\|[[:blank:]\n])@\1\n<wiki_tdata5>\2@g;
      s@\n(<wiki_tdata[[:digit:]]>\|)([[:blank:]\n])@ \1\2@g;
      # remove index column
      s@(\n|- )<wiki_tdata1>\| [[:digit:]]+ (<wiki_tdata2>)@\1\2@g;
    # remove last empty column
      s@<wiki_tdata3>\|([[:blank:]]*)\n@\1\n@g;
      s@ <wiki_tdata3>\|[[:blank:]]+([^[:blank:]])@: \1@g;
    s@\n\|- <wiki_tdata2>\|[[:blank:]]+([^[:blank:]])@\n\1@g;
    /\&quot;/,/\&quot;/ { s@&quot;([^\n&]+)&quot;@“\1”@g; }
      s@\n(<wiki_tdata[[:digit:]]>\|)([[:blank:]]+)@\n\1\2@g;
      s@([^\n])(<wiki_tdata[[:digit:]]>\|)@\1\n\2@g;
      s@([\n])<wiki_tdata[[:digit:]]>(\|)@\1\2@g;
      ' "${f_new}" > "${f_new2}"
    ;;
    
    "Document Category (final)"\
    |"Document Category (for discussion)"\
    |"Document Types without IPR (draft)"\
    |"Document Types without IPR (final)"\
    |"Glossary"\
    |"EU specific permits"\
    |"US specific permits"\
    |"Intellectual Property Document Types (draft)"\
    |"Intellectual Property Document Types (final)"\
    |"MTA examples"\
    |"Permit Types (GGBN)"\
    |"Typology of Contents (draft)"\
    |"Typology of Contents (final)"\
    )
      echo -e "#   further modifications → \e[34m${f_new2}\e[0m (file $this_standardized_file_basename, section $this_section) …"
      sed --regexp-extended --null-data   '
      s@\n(\|-\n)(\|[[:blank:]\n])@\n\1<wiki_tdata1>\2@g;
      
      # /\n\|[[:blank:]][^|\n]+/{
      #   :trow2oneline s@(\n\|[[:blank:]][^|\n]+)\n([^\n|])@\1νεωλινε\2@; # replace \n to greek letters of newline (as a marker)
      #   ttrow2oneline;
      # }
      
      # s@(<br */>)\n@\1@g; # versuche tabledata zu Einzeilern zu machen
      # # geht nur, wenn trow Einzeiler!
      # for i in {31..41}; do echo "  s@(<wiki_tdata${i}>[^\\n]+)\\n(\|([[:blank:]\\n]|νεωλινε))@\1\\n<wiki_tdata$((i + 1))>\2@g;"; done
      s@(<wiki_tdata1>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata2>\2@g;
      s@(<wiki_tdata2>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata3>\2@g;
      s@(<wiki_tdata3>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata4>\2@g;
      s@(<wiki_tdata4>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata5>\2@g;
      s@(<wiki_tdata5>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata6>\2@g;
      s@(<wiki_tdata6>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata7>\2@g;
      s@(<wiki_tdata7>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata8>\2@g;
      s@(<wiki_tdata8>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata9>\2@g;
      s@(<wiki_tdata9>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata10>\2@g;
      s@(<wiki_tdata10>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata11>\2@g;
      s@(<wiki_tdata11>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata12>\2@g;
      s@(<wiki_tdata12>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata13>\2@g;
      s@(<wiki_tdata13>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata14>\2@g;
      s@(<wiki_tdata14>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata15>\2@g;
      s@(<wiki_tdata15>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata16>\2@g;
      s@(<wiki_tdata16>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata17>\2@g;
      s@(<wiki_tdata17>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata18>\2@g;
      s@(<wiki_tdata18>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata19>\2@g;
      s@(<wiki_tdata19>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata20>\2@g;
      s@(<wiki_tdata20>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata21>\2@g;
      s@(<wiki_tdata21>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata22>\2@g;
      s@(<wiki_tdata22>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata23>\2@g;
      s@(<wiki_tdata23>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata24>\2@g;
      s@(<wiki_tdata24>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata25>\2@g;
      s@(<wiki_tdata25>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata26>\2@g;
      s@(<wiki_tdata26>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata27>\2@g;
      s@(<wiki_tdata27>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata28>\2@g;
      s@(<wiki_tdata28>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata29>\2@g;
      s@(<wiki_tdata29>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata30>\2@g;
      s@(<wiki_tdata30>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata31>\2@g;
      s@(<wiki_tdata31>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata32>\2@g;
      s@(<wiki_tdata32>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata33>\2@g;
      s@(<wiki_tdata33>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata34>\2@g;
      s@(<wiki_tdata34>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata35>\2@g;
      s@(<wiki_tdata35>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata36>\2@g;
      s@(<wiki_tdata36>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata37>\2@g;
      s@(<wiki_tdata37>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata38>\2@g;
      s@(<wiki_tdata38>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata39>\2@g;
      s@(<wiki_tdata39>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata40>\2@g;
      s@(<wiki_tdata40>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata41>\2@g;
      s@(<wiki_tdata41>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata42>\2@g;
      
     /\n(<wiki_tdata[[:digit:]]+>\|)([[:blank:]\n]|νεωλινε)/{
       :wiki_tdata2one_line; 
       s@\n(<wiki_tdata[[:digit:]]+>\|)([[:blank:]\n]|νεωλινε)@ \1\2@g; # convert tabledata to one row
       twiki_tdata2one_line;
     }
   ## remove first column, the index column
     s@(\n|- )<wiki_tdata1>\| [[:digit:]]+ (<wiki_tdata2>)@\1\2@g;
     s@(\n|- )<wiki_tdata1>\| (<wiki_tdata2>)@\1\2@g;
   ## (wiki_tdata2: must be the 1st | Wiki column not || attention!)
     s@(<wiki_tdata[3-9]>\|)([[:blank:]]+|νεωλινε)@\1|\2@g; # add || for 2-9 tabledata
     s@(<wiki_tdata[[:digit:]]{2,}>\|)([[:blank:]]+|νεωλινε)@\1|\2@g; # add || for 10-99 tabledata
     s@(<wiki_tdata[[:digit:]]{2,}>\|)(\n)@\1|\2@g; # add || for the last tabledata (10-99 tabledata)
     s@(<wiki_tdata[2-9]>\|)(\n)@\1|\2@g; # add || for the last tabledata (2-9 tabledata)
   ## should be all in-line: tr + td + td … now
    s@<wiki_tdata[[:digit:]]+>@@g;       # have only Wiki-inline || … || 
    s@( +\|\|){1,}\n@\1\n@g;             # remove redundant “empty” trailing table data
    s@ +\|\|\n@\n@g;                     # remove last empty table data
    # remove empty left rows 
      /\n(\|- \|\n)/{ 
       :empty_left_trow s@\n(\|- \|\n)@\n@g; tempty_left_trow
      }
    s@\n(\|-) (\|[^|\n]+)@\n\1\n\2@g;    # line break for first column
    s@ *\|\|( |νεωλινε)@\n|\1@g;         # all || to \n|
   ## clean up multiple \n and <br /> immediately after |
    s@\n\|(νεωλινε|<br */>){1,}@\n| @g;
      ' "${f_new}" > "${f_new2}" 
      ###############
      # from here on tr and td should be single rows, sometimes with νεωλινε marker in it all other line breaks shall and should have \n
      ###############
      case $this_standardized_file_basename in 
      "Document Category (final)"\
      |"Document Category (for discussion)"\
      )
        echo -e "#   further \e[34mc\e[0m\e[33mo\e[0m\e[32ml\e[0m\e[31mo\e[0m\e[34mr\e[0m modifications → \e[34m${f_new2}\e[0m (file $this_standardized_file_basename, section $this_section) …"
        sed --regexp-extended --in-place --null-data '
          s@\n\|-(\n\| Access and Benefit-Sharing Document)@\n|- style="background-color: #f4cccc;"\1@I;
          s@\n\|-(\n\| High level arrangements)@\n|- style="background-color: #f9cb9c;"\1@I;
          s@\n\|-(\n\| permits for collecting[^\n]+)@\n|- style="background-color: #fff2cc;"\1@I;
          s@\n\|-(\n\| Research Permission)@\n|- style="background-color: #d9ead3;"\1@I;
          s@\n\|-(\n\| material transfer agreements[^\n]+)@\n|- style="background-color: #d0e0e3;"\1@I;
          s@\n\|-(\n\| Transport Documents)@\n|- style="background-color: #c9daf8;"\1@I;
          s@\n\|-(\n\| Exemption Permission[^\n]+)@\n|- style="background-color: #d9d2e9;"\1@I;
        ' "${f_new2}"
      ;;
      esac

      # Document Types w/o IPR (draft)
      case $this_standardized_file_basename in 
      "Document Types without IPR (draft)"\
      |"Document Types without IPR (final)"\
      )
        echo -e "#   further \e[34mc\e[0m\e[33mo\e[0m\e[32ml\e[0m\e[31mo\e[0m\e[34mr\e[0m modifications → \e[34m${f_new2}\e[0m (file $this_standardized_file_basename, section $this_section) …"
        sed --regexp-extended --in-place --null-data ' # set trow to color
        s@\n(\|-)(\n\| Access and Benefit-Sharing[^\n]+)@\n\1 style="background-color: #f4cccc;"\2@Ig;
        s@\n(\|-)(\n\| High Level Arrangements[^\n]*)@\n\1 style="background-color: #f9cb9c;"\2@Ig;
        s@\n(\|-)(\n\| Permits for Research[^\n]*)@\n\1 style="background-color: #d9ead3;"\2@Ig;
        s@\n(\|-)(\n\| Research Permission[^\n]*)@\n\1 style="background-color: #d9ead3;"\2@Ig;
        s@\n(\|-)(\n\| material transfer agreements[^\n]*)@\n\1 style="background-color: #d0e0e3;"\2@Ig;
        s@\n(\|-)(\n\| Transport Document[^\n]*)@\n\1 style="background-color: #c9daf8;"\2@Ig;
          # check from 2nd column «status term»: rows with «will be removed», 
          #   «Term & definition approved»       → green   #93c47d
          #   «Term approved, check definition»  → yellow  #ffd966
          #   «Review in progress»               → yellow  #ffd966
          #   «under Review»                     → yellow  #ffd966
          #   «will be deleted»                  → gray    #cccccc
          #   «will be removed»                  → gray    #cccccc
        s@\n(\|-)([^\n]*)\n(\|[^\n]*)\n(\|)( will be (deleted|removed))@\n\1 style="background-color: #cccccc;"\n\3\n\4\5@Ig;
        s@\n(\|-)([^\n]*)\n(\|[^\n]*)\n(\|)( Term & definition approved)@\n\1\2\n\3\n\4 style="background-color: #93c47d;" |\5@Ig;
        s@\n(\|-)([^\n]*)\n(\|[^\n]*)\n(\|)( Term approved, check definition)@\n\1\2\n\3\n\4 style="background-color: #ffd966;" |\5@Ig;
        s@\n(\|-)([^\n]*)\n(\|[^\n]*)\n(\|)( Review in progress)@\n\1\2\n\3\n\4 style="background-color: #ffd966;" |\5@Ig;
        s@\n(\|-)([^\n]*)\n(\|[^\n]*)\n(\|)( under Review)@\n\1\2\n\3\n\4 style="background-color: #ffd966;" |\5@Ig;
        ' "${f_new2}"
      ;;
      esac

      case $this_standardized_file_basename in 
      "Intellectual Property Document Types (draft)"\
      |"Intellectual Property Document Types (final)"\
      )
        echo -e "#   further \e[34mc\e[0m\e[33mo\e[0m\e[32ml\e[0m\e[31mo\e[0m\e[34mr\e[0m modifications → \e[34m${f_new2}\e[0m (file $this_standardized_file_basename, section $this_section) …"
        sed --regexp-extended --in-place --null-data ' # set trow to color
        # check from 2nd column «status term»: rows with «will be removed», 
          #   «Term approved, check definition»  → green   #93c47d
          #   «Review in progress»               → orange  #ff9900
          #   «Patent License»                   → red     #ff0000
        s@\n(\|-)([^\n]*)\n(\|[^\n]*)\n(\|)( Term approved, check definition)@\n\1\2\n\3\n\4 style="background-color: #93c47d;" |\5@Ig;
        s@\n(\|-)([^\n]*)\n(\|[^\n]*)\n(\|)( Review in progress)@\n\1\2\n\3\n\4 style="background-color: #ff9900;" |\5@Ig;
        # check from 3rd column
        s@\n(\|-)([^\n]*)\n(\|[^\n]*)\n(\|)([^\n]*)\n(\|)([^\n]*Patent License[^\n]*)@\n\1\2\n\3\n\4\5\n\6 style="background-color: #ff0000;" |\7@Ig;
        ' "${f_new2}"
      ;;
      esac

      case $this_standardized_file_basename in 
      "MTA examples"\
      )
      echo -e "#   further \e[34mc\e[0m\e[33mo\e[0m\e[32ml\e[0m\e[31mo\e[0m\e[34mr\e[0m modifications → \e[34m${f_new2}\e[0m (file $this_standardized_file_basename, section $this_section) …"
        sed --regexp-extended --in-place --null-data '
          s@\n(\| Term approved[^\n]+)@\n| style="background-color: #93c47d;"\1@g;
        ' "${f_new2}"
      ;;    
      esac
    
      case $this_standardized_file_basename in 
      "Typology of Contents (draft)"\
      |"Typology of Contents (final)"\
      )
      echo -e "#   further \e[34mc\e[0m\e[33mo\e[0m\e[32ml\e[0m\e[31mo\e[0m\e[34mr\e[0m modifications → \e[34m${f_new2}\e[0m (file $this_standardized_file_basename, section $this_section) …"
        sed --regexp-extended --in-place --null-data ' # replaced removed
          # check from 3rd column «status term»: rows with «will be removed», 
          #   «removed»                   → gray    #cccccc
          #   «replaced»                  → gray    #cccccc
          #   «under review»              → yellow  #ffd966 TODO
          s@\n(\|-)([^\n]*)\n(\|[^\n]*)\n(\|)([^\n]*)\n(\|)( replaced| removed)@\n\1 style="background-color: #cccccc;"\n\3\n\4\5\n\6\7@Ig;
          s@\n(\|-)([^\n]*)\n(\|[^\n]*)\n(\|)([^\n]*)\n(\|)( under review)@\n\1\n\3\n\4\5\n\6 style="background-color: #ffd966;"|\7@Ig;
        # y        green  #b6d7a8
        # possible green  #b6d7a8
        # approved green  #b6d7a8
        # n/a      orange #f9cb9c
          s@(\| )(Y\n)@\1style="background-color: #b6d7a8;"| \2@Ig;
          s@(\| )(approved\n)@\1style="background-color: #b6d7a8;"| \2@Ig;
          s@(\| )(possible\n)@\1style="background-color: #b6d7a8;"| \2@Ig;
          s@(\| )(n/a\n)@\1style="background-color: #f9cb9c;"| \2@Ig;
        # TODO remove color from trow that has gray #cccccc
          ## /\n\|- style="background-color: #cccccc;"\n/{
          ##   :trow_status_removed
          ##   /\n\|- style="background-color: #cccccc;"\n([^\n]+){1,}/{
          ##   }  
          ## }
        ' "${f_new2}"
      ;;
      esac
    
    ;;

    *) 
    ;;
    esac

    # for clean_up_file in … TODO
    # clean up trailing <br/> within td … TODO
    # remove null character
   sed --in-place '
    s@\x0@@g; 
    s@νεωλινε@\n@g; # return \n from νεωλινε as it was before
   ' "${f_new}"
   if [[ -e "${f_new2}" ]]; then
    sed --in-place '
    s@\x0@@g;
    s@νεωλινε@\n@g; # return \n from νεωλινε as it was before
   ' "${f_new2}"; 
   fi

done
unset IFS

Generate HTML-IDs from list items usig sed.js.org/#

## Permits for collecting aso.

- Authorisaton to enter site
- Collecting permit
- Taking: “incidental take” permit
- Taking: Migratory Bird Treaty Act (MBTA) Special Purpose, Salvage Permit
- Taking: Salvage Permit (e.g., Non-US, US federal, state, local)
- Possessing: Receiving permit
- Exemption evidence

```
* [[#Permits_for_collecting_aso-definition_Authorisaton_to_enter_site|Authorisaton to enter site]] 
* [[#Permits_for_collecting_aso-definition_Collecting_permit|Collecting permit]] 
* [[#Permits_for_collecting_aso-definition_Taking_incidental_take_permit|Taking: “incidental take” permit]] 
* [[#Permits_for_collecting_aso-definition_Taking_Migratory_Bird_Treaty_Act.Special_Purpose._Salvage_Permit|Taking: Migratory Bird Treaty Act (MBTA) Special Purpose, Salvage Permit]] 
* [[#Permits_for_collecting_aso-definition_Taking_Salvage_Permit|Taking: Salvage Permit (e.g., Non-US, US federal, state, local)]] 
* [[#Permits_for_collecting_aso-definition_Possessing_Receiving_permit|Possessing: Receiving permit]] 
* [[#Permits_for_collecting_aso-definition_Exemption_evidence|Exemption evidence]] 

<span id="Permits_for_collecting_aso-definition_Authorisaton_to_enter_site"></span>
<span id="Permits_for_collecting_aso-definition_Collecting_permit"></span>
<span id="Permits_for_collecting_aso-definition_Taking_incidental_take_permit"></span>
<span id="Permits_for_collecting_aso-definition_Taking_Migratory_Bird_Treaty_Act.Special_Purpose._Salvage_Permit"></span>
<span id="Permits_for_collecting_aso-definition_Taking_Salvage_Permit"></span>
<span id="Permits_for_collecting_aso-definition_Possessing_Receiving_permit"></span>
<span id="Permits_for_collecting_aso-definition_Exemption_evidence"></span>
```

```bash
sed --regexp-extended '
s@^- +@@;
h; # get original item to hold space
s@^([^()]+) \(.+\)$@\1@g; # modifications: item → Wiki-Code/HTML-ID
s@\x27@@g; # single quote
s@Migratory Bird Treaty Act \(MBTA\)[ ]@Migratory Bird Treaty Act,@;
s@ +\(@-(@g; s@[()“”:]@@g;
s@ +$@@g; s@^ +@@g; s@ \& @ and @; s@/@ or @g;
s@^@Permits for collecting aso-definition_@;
s@ +@_@g;
# no punctation except - and …
s@[-]@στριχψ@g; s@[_]@τιεφστριχψ@g; 
s@[[:punct:]]+@.@g;
s@τιεφστριχψ@_@g; s@στριχψ@-@g; 

s@(.+)@[[#\1|…placeholder…]] <span id="\1"></span>@;

x; # exchange original item and modified Wiki code/HTML-ID
G; # now: original\nmodified
s@([^\n]+)\n(.+)(…placeholder…)@\2\1@; # substitute placeholder by original item
s@<span@\n&@; s@</span>@&\n@; # add some nicer line breaks
'
```

`config_of_tidy.txt`

accessibility-check: 0 (Tidy Classic)
add-meta-charset: no
add-xml-decl: no
add-xml-space: no
alt-text: 
anchor-as-name: yes
ascii-chars: no
assume-xml-procins: no
bare: no
break-before-br: no
char-encoding: utf8
clean: no
coerce-endtags: yes
css-prefix: c
custom-tags: no
decorate-inferred-ul: no
doctype: auto
drop-empty-elements: yes
drop-empty-paras: yes
drop-proprietary-attributes: no
enclose-block-text: no
enclose-text: no
error-file: 
escape-cdata: no
escape-scripts: yes
fix-backslash: yes
fix-bad-comments: auto
fix-style-tags: yes
fix-uri: yes
force-output: no
gdoc: no
gnu-emacs: no
hide-comments: no
indent: no
indent-attributes: no
indent-cdata: no
indent-spaces: 2
indent-with-tabs: no
input-encoding: utf8
input-xml: no
join-classes: no
join-styles: yes
keep-tabs: no
keep-time: no
literal-attributes: no
logical-emphasis: no
lower-literals: yes
markup: yes
merge-divs: auto
merge-emphasis: yes
merge-spans: auto
mute: 
mute-id: no
ncr: yes
new-blocklevel-tags: 
new-empty-tags: 
new-inline-tags: 
new-pre-tags: 
newline: LF
numeric-entities: no
omit-optional-tags: no
output-bom: auto
output-encoding: utf8
output-file: 
output-html: no
output-xhtml: no
output-xml: no
preserve-entities: no
priority-attributes: 
punctuation-wrap: no
quiet: no
quote-ampersand: yes
quote-marks: no
quote-nbsp: yes
repeated-attributes: keep-last
replace-color: no
show-body-only: no
show-errors: 6
show-filename: no
show-info: yes
show-meta-change: no
show-warnings: yes
skip-nested: yes
sort-attributes: none
strict-tags-attributes: no
tab-size: 8
tidy-mark: yes
uppercase-attributes: no
uppercase-tags: no
vertical-space: no
warn-proprietary-attributes: yes
word-2000: no
wrap: 0
wrap-asp: yes
wrap-attributes: no
wrap-jste: yes
wrap-php: no
wrap-script-literals: no
wrap-sections: yes
write-back: no

User talk:A.Plank/Permit and Loan Terms (Handbook)

Contents

`convert2mediawiki-mwt.sh`

`config_of_tidy.txt`

Navigation menu

Personal tools

Namespaces

Variants

Views

Actions

Search

Home

Search

Repositories

Resources

Library

About

Meetings

News and Outreach

Tools