Difference between revisions of "User talk:A.Plank/Permit and Loan Terms (Handbook)"

From GGBN Wiki
Jump to: navigation, search
(+note)
(convert2mediawiki-mwt.sh)
Line 33: Line 33:
 
done
 
done
  
for f in $( find . -maxdepth 1 -iregex '\.[^.]+.tidy.html.mwt$' | sort );do  
+
for f in $( find . -maxdepth 1 -iregex '\.[^.]+.tidy.html.mwt$' | sort ); # only not modified, original files
 +
  do  
 
   f_new="${f}.modified"  # default modifications for all
 
   f_new="${f}.modified"  # default modifications for all
 
   f_new2="${f}.modified2" # particular modifications
 
   f_new2="${f}.modified2" # particular modifications
Line 41: Line 42:
 
     s@.tidy.html.mwt$@@; s@^./@@g;  
 
     s@.tidy.html.mwt$@@; s@^./@@g;  
 
     s@.*(Document Category \(for discussion\)).*@\1@I;
 
     s@.*(Document Category \(for discussion\)).*@\1@I;
     s@Document Types w o IPR \(draft\)@Document Types w/o IPR (draft)@I;
+
    s@.*(Document Category[ -–]+final).*@Overview@I;
 +
     s@Document Types w o IPR[ -–]+[(]*draft[)]*@Document Types w/o IPR (draft)@I;
 +
    s@Document Types w o IPR[ -–]+[(]*final[)]*@Biodiversity Permit/Contract Typology@I;
 
     s@intellectual Property Document Types \(draft\)@Intellectual Property Document Types (draft)@I;
 
     s@intellectual Property Document Types \(draft\)@Intellectual Property Document Types (draft)@I;
 
     s@ReadMe@Introduction@I;
 
     s@ReadMe@Introduction@I;
 
     s@\((AU specific permits)\)@\1@I;
 
     s@\((AU specific permits)\)@\1@I;
 +
    '
 +
  )
 +
  this_standardized_file_basename=$(echo "$f" | sed --regexp-extended '
 +
    s@.tidy.html.mwt$@@; s@^./@@g;
 +
    s@.*(Document Category \(for discussion\)).*@\1@I;
 +
    s@.*(Document Category[ -–]+[(]*final[)]).*@Document Category (final)@I;
 +
    s@Document Types w[ -–]+o IPR[ -–]+[(]*draft[)]*@Document Types without IPR (draft)@I;
 +
    s@Document Types w[ -–]+o IPR[ -–]+[(]*final[)]*@Document Types without IPR (final)@I;
 +
    s@intellectual Property Document Types[ -–]+[(]*draft[)]*@Intellectual Property Document Types (draft)@I;
 +
    s@intellectual Property Document Types[ -–]+[(]*final[)]*@Intellectual Property Document Types (final)@I;
 +
    s@Typology of Contents[ -–]+[(]*draft[)]*@Typology of Contents (draft)@I;
 +
    s@Typology of Contents[ -–]+[(]*final[)]*@Typology of Contents (final)@I;
 +
    s@ReadMe@Readme@I;
 +
    s@\((AU specific permits)\)@\1@I;
 +
    '
 +
  )
 +
 +
  # this_wiki_site is perhaps unused
 +
  this_wiki_site=$(echo "$f" | sed --regexp-extended '
 +
    s@.tidy.html.mwt$@@; s@^./@@g;
 +
    s@^ *Document Category[ -–]+final *$@The Biodiversity Permit/Contract Typology (Permit and Loan Terms, Handbook)@I;
 +
    s@^ *Document Types w o IPR[ -–]+[(]*final[)]* *$@The Biodiversity Permit/Contract Typology (Permit and Loan Terms, Handbook)@I;
 +
    s@^ *ReadMe *$@Permit and Loan Terms (Handbook)@I;
 +
    s@^ *Typology of Contents[ -–]+[(]*draft[)]* *$@Typology of Legal/Contractual Terms for Biodiversity Specimens (Permit and Loan Terms, Handbook)@;
 +
    # s@\((AU specific permits)\)@\1@I;
 
     '
 
     '
 
   )
 
   )
Line 54: Line 82:
 
     s@.*(Document Category \(for discussion\)).*@<span id="Document_Category_-_for_discussion"></span>@I;
 
     s@.*(Document Category \(for discussion\)).*@<span id="Document_Category_-_for_discussion"></span>@I;
 
     s@.*(Document Category - final).*@<span id="Document_Category"></span>@I;
 
     s@.*(Document Category - final).*@<span id="Document_Category"></span>@I;
     s@.*(Document Types w o IPR \(draft\)).*@<span id="Document_Types_w_o_IPR"></span>@I;
+
     s@.*(Document Types w o IPR[ -–]+[(]*draft[)]*).*@<span id="Document_Types_without_IPR_draft"></span>@I;
     s@.*(intellectual Property Document Types \(draft\)).*@<span id="Intellectual_Property_Document_Types_-_draft"></span>@I;
+
    s@.*(Document Types w o IPR[ -–]+[(]*final[)]*).*@<span id="Document_Types_without_IPR_final"></span>@I;
 +
     s@.*(intellectual Property Document Types[ -–]+[(]*draft[)]).*@<span id="Intellectual_Property_Document_Types_-_draft"></span>@I;
 
     s@.*(MTA examples).*@<span id="MTA_examples"></span>@I;
 
     s@.*(MTA examples).*@<span id="MTA_examples"></span>@I;
 
     s@.*(Permit Types \(GGBN\)).*@<span id="Permit_Types_GGBN"></span>@I;
 
     s@.*(Permit Types \(GGBN\)).*@<span id="Permit_Types_GGBN"></span>@I;
     s@.*(Typology of Contents \(draft\)).*@<span id="Typology_of_Contents"></span>@I;
+
     s@^.*(Typology of Contents[ -–]+[(]*draft[)]).*$@<span id="Typology_of_Contents"></span>@I;
 
     s@.*ReadMe.*@@I;
 
     s@.*ReadMe.*@@I;
     # s@.*().*@<span id=""></span>@I;
+
     /^<span/!{ s@ +@_@g; s@^(.+)$@<span id="unknown_conversion_in_testing_mode_\1"></span>@;}
 
     '
 
     '
 
   )
 
   )
Line 101: Line 130:
 
     s@(<span style=\")text-decoration:line-through;([^\"]+\">)([^<>]+)(</span>)@<s>\1\2\3\4</s>@g
 
     s@(<span style=\")text-decoration:line-through;([^\"]+\">)([^<>]+)(</span>)@<s>\1\2\3\4</s>@g
 
      
 
      
     s@(<span style=\")text-decoration:underline;(\">)([^<>]+)(</span>)@<s>\3</s>@g
+
     s@(<span style=\")text-decoration:underline;(\">)([^<>]+)(</span>)@<u>\3</u>@g
     s@(<span style=\")text-decoration:underline;([^\"]+\">)([^<>]+)(</span>)@<s>\3</s>@g
+
     s@(<span style=\")text-decoration:underline;([^\"]+\">)([^<>]+)(</span>)@<u>\3</u>@g
 
      
 
      
 
     s@(<span style=\")font-style:italic;([^\"]+\">)([^<>]+)(</span>)@<i>\1\2\3\4</i>@g
 
     s@(<span style=\")font-style:italic;([^\"]+\">)([^<>]+)(</span>)@<i>\1\2\3\4</i>@g
Line 148: Line 177:
 
     # final formattings
 
     # final formattings
 
     s@\{\|\n@{| class=\"vertical-align-top booktabledotted\"\n@g;
 
     s@\{\|\n@{| class=\"vertical-align-top booktabledotted\"\n@g;
     # remove all A, B, C headings
+
     # remove all A, B, C headings?
 +
    # format some columns
 +
    s@\| (Repmarks\n)@| style=\"min-width:150px\" | \1@Ig;
 +
    s@\| (suggested improvements\n)@| style=\"min-width:150px\" | \1@Ig;
 
     " \
 
     " \
 
     "${f_new}"
 
     "${f_new}"
Line 154: Line 186:
 
      
 
      
 
     # for i in {1..30}; do echo "  s@(<wiki_tdata${i}>[^\\n]+)\\n(\|[[:blank:]\\n])@\1\\n<wiki_tdata$((i + 1))>\2@g;"; done
 
     # for i in {1..30}; do echo "  s@(<wiki_tdata${i}>[^\\n]+)\\n(\|[[:blank:]\\n])@\1\\n<wiki_tdata$((i + 1))>\2@g;"; done
     case "$this_section" in
+
     case "$this_standardized_file_basename" in
 
+
     Readme)   
     ReadMe|Introduction)   
+
       echo -e "#  further modifications → \e[34m${f_new2}\e[0m (file $this_standardized_file_basename, section $this_section) …"
       echo -e "#  further modifications → \e[34m${f_new2}\e[0m (section $this_section) …"
 
 
       sed --regexp-extended --null-data  '
 
       sed --regexp-extended --null-data  '
 
       s@\n(\|-\n)(\|[[:blank:]\n])@\n\1<wiki_tdata1>\2@g;
 
       s@\n(\|-\n)(\|[[:blank:]\n])@\n\1<wiki_tdata1>\2@g;
Line 172: Line 203:
 
     s@\n\|- <wiki_tdata2>\|[[:blank:]]+([^[:blank:]])@\n\1@g;
 
     s@\n\|- <wiki_tdata2>\|[[:blank:]]+([^[:blank:]])@\n\1@g;
 
     /\&quot;/,/\&quot;/ { s@&quot;([^\n&]+)&quot;@“\1”@g; }
 
     /\&quot;/,/\&quot;/ { s@&quot;([^\n&]+)&quot;@“\1”@g; }
 +
      s@\n(<wiki_tdata[[:digit:]]>\|)([[:blank:]]+)@\n\1\2@g;
 +
      s@([^\n])(<wiki_tdata[[:digit:]]>\|)@\1\n\2@g;
 +
      s@([\n])<wiki_tdata[[:digit:]]>(\|)@\1\2@g;
 
       ' "${f_new}" > "${f_new2}"
 
       ' "${f_new}" > "${f_new2}"
 
     ;;
 
     ;;
     "Document Category - final"\
+
   
 +
     "Document Category (final)"\
 
     |"Document Category (for discussion)"\
 
     |"Document Category (for discussion)"\
     |"Document Types w/o IPR (draft)"\
+
     |"Document Types without IPR (draft)"\
 +
    |"Document Types without IPR (final)"\
 +
    |"Glossary"\
 
     |"EU specific permits"\
 
     |"EU specific permits"\
 +
    |"US specific permits"\
 
     |"Intellectual Property Document Types (draft)"\
 
     |"Intellectual Property Document Types (draft)"\
     |"MTA examples"|"Permit Types (GGBN)"|"Typology of Contents (draft)"|"US specific permits")
+
    |"Intellectual Property Document Types (final)"\
       echo -e "#  further modifications → \e[34m${f_new2}\e[0m (section $this_section) …"
+
     |"MTA examples"\
 +
    |"Permit Types (GGBN)"\
 +
    |"Typology of Contents (draft)"\
 +
    |"Typology of Contents (final)"\
 +
    )
 +
       echo -e "#  further modifications → \e[34m${f_new2}\e[0m (file $this_standardized_file_basename, section $this_section) …"
 
       sed --regexp-extended --null-data  '
 
       sed --regexp-extended --null-data  '
 
       s@\n(\|-\n)(\|[[:blank:]\n])@\n\1<wiki_tdata1>\2@g;
 
       s@\n(\|-\n)(\|[[:blank:]\n])@\n\1<wiki_tdata1>\2@g;
Line 221: Line 264:
 
       s@(<wiki_tdata28>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata29>\2@g;
 
       s@(<wiki_tdata28>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata29>\2@g;
 
       s@(<wiki_tdata29>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata30>\2@g;
 
       s@(<wiki_tdata29>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata30>\2@g;
      s@(<wiki_tdata30>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata31>\2@g;
 
 
       s@(<wiki_tdata30>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata31>\2@g;
 
       s@(<wiki_tdata30>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata31>\2@g;
 
       s@(<wiki_tdata31>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata32>\2@g;
 
       s@(<wiki_tdata31>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata32>\2@g;
Line 261: Line 303:
 
     s@\n\|(νεωλινε|<br */>){1,}@\n| @g;
 
     s@\n\|(νεωλινε|<br */>){1,}@\n| @g;
 
       ' "${f_new}" > "${f_new2}"  
 
       ' "${f_new}" > "${f_new2}"  
    ###############
+
      ###############
    # from here on tr and td should be single rows, sometimes with νεωλινε marker in it all other line breaks shall and should have \n
+
      # from here on tr and td should be single rows, sometimes with νεωλινε marker in it all other line breaks shall and should have \n
    ###############    
+
      ###############
    if [[ "$this_section" == "Document Category - final" ]]; then
+
      case $this_standardized_file_basename in
       echo -e "#  further \e[34mc\e[0m\e[33mo\e[0m\e[32ml\e[0m\e[31mo\e[0m\e[34mr\e[0m modifications → \e[34m${f_new2}\e[0m (section $this_section) …"
+
      "Document Category (final)"\
      sed --regexp-extended --in-place --null-data '
+
      |"Document Category (for discussion)"\
        s@\n\|-(\n\| Access and Benefit-Sharing Document)@\n|- style="background-color: #f4cccc;"\1@;
+
       )
        s@\n\|-(\n\| High level arrangements)@\n|- style="background-color: #f9cb9c;"\1@;
+
        echo -e "#  further \e[34mc\e[0m\e[33mo\e[0m\e[32ml\e[0m\e[31mo\e[0m\e[34mr\e[0m modifications → \e[34m${f_new2}\e[0m (file $this_standardized_file_basename, section $this_section) …"
        s@\n\|-(\n\| permits for collecting[^\n]+)@\n|- style="background-color: #fff2cc;"\1@;
+
        sed --regexp-extended --in-place --null-data '
        s@\n\|-(\n\| Research Permission)@\n|- style="background-color: #d9ead3;"\1@;
+
          s@\n\|-(\n\| Access and Benefit-Sharing Document)@\n|- style="background-color: #f4cccc;"\1@I;
        s@\n\|-(\n\| material transfer agreements[^\n]+)@\n|- style="background-color: #d0e0e3;"\1@;
+
          s@\n\|-(\n\| High level arrangements)@\n|- style="background-color: #f9cb9c;"\1@I;
        s@\n\|-(\n\| Transport Documents)@\n|- style="background-color: #c9daf8;"\1@;
+
          s@\n\|-(\n\| permits for collecting[^\n]+)@\n|- style="background-color: #fff2cc;"\1@I;
        s@\n\|-(\n\| Exemption Permission[^\n]+)@\n|- style="background-color: #d9d2e9;"\1@;
+
          s@\n\|-(\n\| Research Permission)@\n|- style="background-color: #d9ead3;"\1@I;
       ' "${f_new2}"
+
          s@\n\|-(\n\| material transfer agreements[^\n]+)@\n|- style="background-color: #d0e0e3;"\1@I;
    fi
+
          s@\n\|-(\n\| Transport Documents)@\n|- style="background-color: #c9daf8;"\1@I;
 +
          s@\n\|-(\n\| Exemption Permission[^\n]+)@\n|- style="background-color: #d9d2e9;"\1@I;
 +
        ' "${f_new2}"
 +
      ;;
 +
      esac
 +
 
 +
      # Document Types w/o IPR (draft)
 +
      case $this_standardized_file_basename in
 +
      "Document Types without IPR (draft)"\
 +
      |"Document Types without IPR (final)"\
 +
      )
 +
        echo -e "#  further \e[34mc\e[0m\e[33mo\e[0m\e[32ml\e[0m\e[31mo\e[0m\e[34mr\e[0m modifications → \e[34m${f_new2}\e[0m (file $this_standardized_file_basename, section $this_section) …"
 +
        sed --regexp-extended --in-place --null-data ' # set trow to color
 +
        s@\n(\|-)(\n\| Access and Benefit-Sharing[^\n]+)@\n\1 style="background-color: #f4cccc;"\2@Ig;
 +
        s@\n(\|-)(\n\| High Level Arrangements[^\n]*)@\n\1 style="background-color: #f9cb9c;"\2@Ig;
 +
        s@\n(\|-)(\n\| Permits for Research[^\n]*)@\n\1 style="background-color: #d9ead3;"\2@Ig;
 +
        s@\n(\|-)(\n\| Research Permission[^\n]*)@\n\1 style="background-color: #d9ead3;"\2@Ig;
 +
        s@\n(\|-)(\n\| material transfer agreements[^\n]*)@\n\1 style="background-color: #d0e0e3;"\2@Ig;
 +
        s@\n(\|-)(\n\| Transport Document[^\n]*)@\n\1 style="background-color: #c9daf8;"\2@Ig;
 +
          # check from 2nd column «status term»: rows with «will be removed»,
 +
          #  «Term & definition approved»       → green  #93c47d
 +
          #  «Term approved, check definition»  → yellow  #ffd966
 +
          #  «Review in progress»              → yellow  #ffd966
 +
          #  «under Review»                    → yellow  #ffd966
 +
          #  «will be deleted»                  → gray    #cccccc
 +
          #  «will be removed»                  → gray    #cccccc
 +
        s@\n(\|-)([^\n]*)\n(\|[^\n]*)\n(\|)( will be (deleted|removed))@\n\1 style="background-color: #cccccc;"\n\3\n\4\5@Ig;
 +
        s@\n(\|-)([^\n]*)\n(\|[^\n]*)\n(\|)( Term & definition approved)@\n\1\2\n\3\n\4 style="background-color: #93c47d;" |\5@Ig;
 +
        s@\n(\|-)([^\n]*)\n(\|[^\n]*)\n(\|)( Term approved, check definition)@\n\1\2\n\3\n\4 style="background-color: #ffd966;" |\5@Ig;
 +
        s@\n(\|-)([^\n]*)\n(\|[^\n]*)\n(\|)( Review in progress)@\n\1\2\n\3\n\4 style="background-color: #ffd966;" |\5@Ig;
 +
        s@\n(\|-)([^\n]*)\n(\|[^\n]*)\n(\|)( under Review)@\n\1\2\n\3\n\4 style="background-color: #ffd966;" |\5@Ig;
 +
        ' "${f_new2}"
 +
      ;;
 +
      esac
  
    if [[ "$this_section" == "Document Types w/o IPR (draft)" ]]; then
+
      case $this_standardized_file_basename in
       echo -e "#  further \e[34mc\e[0m\e[33mo\e[0m\e[32ml\e[0m\e[31mo\e[0m\e[34mr\e[0m modifications → \e[34m${f_new2}\e[0m (section $this_section) …"
+
      "Intellectual Property Document Types (draft)"\
      sed --regexp-extended --in-place --null-data ' # set trow to color
+
      |"Intellectual Property Document Types (final)"\
      s@\n(\|-)(\n\| Access and Benefit-Sharing[^\n]+)@\n\1 style="background-color: #f4cccc;"\2@Ig;
+
       )
      s@\n(\|-)(\n\| High Level Arrangements[^\n]*)@\n\1 style="background-color: #f9cb9c;"\2@Ig;
+
        echo -e "#  further \e[34mc\e[0m\e[33mo\e[0m\e[32ml\e[0m\e[31mo\e[0m\e[34mr\e[0m modifications → \e[34m${f_new2}\e[0m (file $this_standardized_file_basename, section $this_section) …"
      s@\n(\|-)(\n\| permits for collecting[^\n]*)@\n\1 style="background-color: #fff2cc;"\2@Ig;
+
        sed --regexp-extended --in-place --null-data ' # set trow to color
      s@\n(\|-)(\n\| Research Permission[^\n]*)@\n\1 style="background-color: #d9ead3;"\2@Ig;
 
      s@\n(\|-)(\n\| material transfer agreements[^\n]*)@\n\1 style="background-color: #d0e0e3;"\2@Ig;
 
      s@\n(\|-)(\n\| Transport Document[^\n]*)@\n\1 style="background-color: #c9daf8;"\2@Ig;
 
      s@\n(\|-)(\n\| Exemption Permission[^\n]+)@\n\1 style="background-color: #d9d2e9;"\2@Ig;
 
 
         # check from 2nd column «status term»: rows with «will be removed»,  
 
         # check from 2nd column «status term»: rows with «will be removed»,  
        #  «Term & definition approved»      → green  #93c47d
+
          #  «Term approved, check definition»  → green  #93c47d
        #  «Term approved, check definition»  → yellow  #ffd966
+
          #  «Review in progress»              → orange #ff9900
        #  «Review in progress»              → yellow #ffd966
+
          «Patent License»                  red    #ff0000
        «under Review»                    yellow  #ffd966
+
         s@\n(\|-)([^\n]*)\n(\|[^\n]*)\n(\|)( Term approved, check definition)@\n\1\2\n\3\n\4 style="background-color: #93c47d;" |\5@Ig;
        #  «will be deleted»                  → gray    #cccccc
+
        s@\n(\|-)([^\n]*)\n(\|[^\n]*)\n(\|)( Review in progress)@\n\1\2\n\3\n\4 style="background-color: #ff9900;" |\5@Ig;
         #  «will be removed»                  → gray    #cccccc
+
        # check from 3rd column
      s@\n(\|-)([^\n]*)\n(\|[^\n]*)\n(\|)( will be (deleted|removed))@\n\1 style="background-color: #cccccc;"\n\3\n\4\5@Ig;
+
        s@\n(\|-)([^\n]*)\n(\|[^\n]*)\n(\|)([^\n]*)\n(\|)([^\n]*Patent License[^\n]*)@\n\1\2\n\3\n\4\5\n\6 style="background-color: #ff0000;" |\7@Ig;
      s@\n(\|-)([^\n]*)\n(\|[^\n]*)\n(\|)( Term & definition approved)@\n\1\2\n\3\n\4 style="background-color: #93c47d;" |\5@Ig;
+
        ' "${f_new2}"
      s@\n(\|-)([^\n]*)\n(\|[^\n]*)\n(\|)( Term approved, check definition)@\n\1\2\n\3\n\4 style="background-color: #ffd966;" |\5@Ig;
+
      ;;
      s@\n(\|-)([^\n]*)\n(\|[^\n]*)\n(\|)( Review in progress)@\n\1\2\n\3\n\4 style="background-color: #ffd966;" |\5@Ig;
+
      esac
      s@\n(\|-)([^\n]*)\n(\|[^\n]*)\n(\|)( under Review)@\n\1\2\n\3\n\4 style="background-color: #ffd966;" |\5@Ig;
 
      ' "${f_new2}"
 
    fi
 
  
    if [[ "$this_section" == "Intellectual Property Document Types (draft)" ]]; then
+
       case $this_standardized_file_basename in  
       echo -e "#  further \e[34mc\e[0m\e[33mo\e[0m\e[32ml\e[0m\e[31mo\e[0m\e[34mr\e[0m modifications → \e[34m${f_new2}\e[0m (section $this_section) …"
+
       "MTA examples"\
      sed --regexp-extended --in-place --null-data ' # set trow to color
+
       )
       # check from 2nd column «status term»: rows with «will be removed»,
+
       echo -e "#  further \e[34mc\e[0m\e[33mo\e[0m\e[32ml\e[0m\e[31mo\e[0m\e[34mr\e[0m modifications → \e[34m${f_new2}\e[0m (file $this_standardized_file_basename, section $this_section) …"
        #  «Term approved, check definition»  → green  #93c47d
+
        sed --regexp-extended --in-place --null-data '
        #  «Review in progress»              → orange  #ff9900
+
          s@\n(\| Term approved[^\n]+)@\n| style="background-color: #93c47d;"\1@g;
        #  «Patent License»                  → red    #ff0000
+
        ' "${f_new2}"
      s@\n(\|-)([^\n]*)\n(\|[^\n]*)\n(\|)( Term approved, check definition)@\n\1\2\n\3\n\4 style="background-color: #93c47d;" |\5@Ig;
+
      ;;   
      s@\n(\|-)([^\n]*)\n(\|[^\n]*)\n(\|)( Review in progress)@\n\1\2\n\3\n\4 style="background-color: #ff9900;" |\5@Ig;
+
      esac
       # check from 3rd column
 
      s@\n(\|-)([^\n]*)\n(\|[^\n]*)\n(\|)([^\n]*)\n(\|)([^\n]*Patent License[^\n]*)@\n\1\2\n\3\n\4\5\n\6 style="background-color: #ff0000;" |\7@Ig;
 
       ' "${f_new2}"
 
    fi
 
 
 
    if [[ "$this_section" == "MTA examples" ]]; then
 
    echo -e "#  further \e[34mc\e[0m\e[33mo\e[0m\e[32ml\e[0m\e[31mo\e[0m\e[34mr\e[0m modifications → \e[34m${f_new2}\e[0m (section $this_section) …"
 
      sed --regexp-extended --in-place --null-data '
 
        s@\n(\| Term approved[^\n]+)@\n| style="background-color: #93c47d;"\1@g;
 
      ' "${f_new2}"
 
    fi
 
 
      
 
      
    if [[ "$this_section" == "Typology of Contents (draft)" ]]; then
+
      case $this_standardized_file_basename in
    echo -e "#  further \e[34mc\e[0m\e[33mo\e[0m\e[32ml\e[0m\e[31mo\e[0m\e[34mr\e[0m modifications → \e[34m${f_new2}\e[0m (section $this_section) …"
+
      "Typology of Contents (draft)"\
      sed --regexp-extended --in-place --null-data ' # replaced removed
+
      |"Typology of Contents (final)"\
        # check from 3rd column «status term»: rows with «will be removed»,  
+
      )
        #  «removed»                  → gray    #cccccc
+
      echo -e "#  further \e[34mc\e[0m\e[33mo\e[0m\e[32ml\e[0m\e[31mo\e[0m\e[34mr\e[0m modifications → \e[34m${f_new2}\e[0m (file $this_standardized_file_basename, section $this_section) …"
        #  «replaced»                  → gray    #cccccc
+
        sed --regexp-extended --in-place --null-data ' # replaced removed
        #  «under review»              → yellow  #ffd966 TODO
+
          # check from 3rd column «status term»: rows with «will be removed»,  
        s@\n(\|-)([^\n]*)\n(\|[^\n]*)\n(\|)([^\n]*)\n(\|)( replaced| removed)@\n\1 style="background-color: #cccccc;"\n\3\n\4\5\n\6\7@Ig;
+
          #  «removed»                  → gray    #cccccc
        s@\n(\|-)([^\n]*)\n(\|[^\n]*)\n(\|)([^\n]*)\n(\|)( under review)@\n\1\n\3\n\4\5\n\6 style="background-color: #ffd966;"|\7@Ig;
+
          #  «replaced»                  → gray    #cccccc
      # y        green  #b6d7a8
+
          #  «under review»              → yellow  #ffd966 TODO
      # possible green  #b6d7a8
+
          s@\n(\|-)([^\n]*)\n(\|[^\n]*)\n(\|)([^\n]*)\n(\|)( replaced| removed)@\n\1 style="background-color: #cccccc;"\n\3\n\4\5\n\6\7@Ig;
      # n/a      orange #f9cb9c
+
          s@\n(\|-)([^\n]*)\n(\|[^\n]*)\n(\|)([^\n]*)\n(\|)( under review)@\n\1\n\3\n\4\5\n\6 style="background-color: #ffd966;"|\7@Ig;
        s@(\| )(Y\n)@\1style="background-color: #b6d7a8;"| \2@Ig;
+
        # y        green  #b6d7a8
        s@(\| )(possible\n)@\1style="background-color: #b6d7a8;"| \2@Ig;
+
        # possible green  #b6d7a8
        s@(\| )(n/a\n)@\1style="background-color: #f9cb9c;"| \2@Ig;
+
        # approved green  #b6d7a8
      # TODO remove color from trow that has gray #cccccc
+
        # n/a      orange #f9cb9c
        ## /\n\|- style="background-color: #cccccc;"\n/{
+
          s@(\| )(Y\n)@\1style="background-color: #b6d7a8;"| \2@Ig;
        ##  :trow_status_removed
+
          s@(\| )(approved\n)@\1style="background-color: #b6d7a8;"| \2@Ig;
        ##  /\n\|- style="background-color: #cccccc;"\n([^\n]+){1,}/{
+
          s@(\| )(possible\n)@\1style="background-color: #b6d7a8;"| \2@Ig;
        ##  }   
+
          s@(\| )(n/a\n)@\1style="background-color: #f9cb9c;"| \2@Ig;
        ## }
+
        # TODO remove color from trow that has gray #cccccc
      ' "${f_new2}"
+
          ## /\n\|- style="background-color: #cccccc;"\n/{
    fi
+
          ##  :trow_status_removed
 +
          ##  /\n\|- style="background-color: #cccccc;"\n([^\n]+){1,}/{
 +
          ##  }   
 +
          ## }
 +
        ' "${f_new2}"
 +
      ;;
 +
      esac
 
      
 
      
 
     ;;
 
     ;;
Line 369: Line 432:
 
done
 
done
 
unset IFS
 
unset IFS
 +
</syntaxhighlight>
 +
 +
 +
 +
Generate HTML-IDs from list items usig sed.js.org/#
 +
 +
<syntaxhighlight lang="text">
 +
## Permits for collecting aso.
 +
 +
- Authorisaton to enter site
 +
- Collecting permit
 +
- Taking: “incidental take” permit
 +
- Taking: Migratory Bird Treaty Act (MBTA) Special Purpose, Salvage Permit
 +
- Taking: Salvage Permit (e.g., Non-US, US federal, state, local)
 +
- Possessing: Receiving permit
 +
- Exemption evidence
 +
 +
```
 +
* [[#Permits_for_collecting_aso-definition_Authorisaton_to_enter_site|Authorisaton to enter site]]
 +
* [[#Permits_for_collecting_aso-definition_Collecting_permit|Collecting permit]]
 +
* [[#Permits_for_collecting_aso-definition_Taking_incidental_take_permit|Taking: “incidental take” permit]]
 +
* [[#Permits_for_collecting_aso-definition_Taking_Migratory_Bird_Treaty_Act.Special_Purpose._Salvage_Permit|Taking: Migratory Bird Treaty Act (MBTA) Special Purpose, Salvage Permit]]
 +
* [[#Permits_for_collecting_aso-definition_Taking_Salvage_Permit|Taking: Salvage Permit (e.g., Non-US, US federal, state, local)]]
 +
* [[#Permits_for_collecting_aso-definition_Possessing_Receiving_permit|Possessing: Receiving permit]]
 +
* [[#Permits_for_collecting_aso-definition_Exemption_evidence|Exemption evidence]]
 +
 +
<span id="Permits_for_collecting_aso-definition_Authorisaton_to_enter_site"></span>
 +
<span id="Permits_for_collecting_aso-definition_Collecting_permit"></span>
 +
<span id="Permits_for_collecting_aso-definition_Taking_incidental_take_permit"></span>
 +
<span id="Permits_for_collecting_aso-definition_Taking_Migratory_Bird_Treaty_Act.Special_Purpose._Salvage_Permit"></span>
 +
<span id="Permits_for_collecting_aso-definition_Taking_Salvage_Permit"></span>
 +
<span id="Permits_for_collecting_aso-definition_Possessing_Receiving_permit"></span>
 +
<span id="Permits_for_collecting_aso-definition_Exemption_evidence"></span>
 +
```
 +
 +
```bash
 +
sed --regexp-extended '
 +
s@^- +@@;
 +
h; # get original item to hold space
 +
s@^([^()]+) \(.+\)$@\1@g; # modifications: item → Wiki-Code/HTML-ID
 +
s@\x27@@g; # single quote
 +
s@Migratory Bird Treaty Act \(MBTA\)[ ]@Migratory Bird Treaty Act,@;
 +
s@ +\(@-(@g; s@[()“”:]@@g;
 +
s@ +$@@g; s@^ +@@g; s@ \& @ and @; s@/@ or @g;
 +
s@^@Permits for collecting aso-definition_@;
 +
s@ +@_@g;
 +
# no punctation except - and …
 +
s@[-]@στριχψ@g; s@[_]@τιεφστριχψ@g;
 +
s@[[:punct:]]+@.@g;
 +
s@τιεφστριχψ@_@g; s@στριχψ@-@g;
 +
 +
s@(.+)@[[#\1|…placeholder…]] <span id="\1"></span>@;
 +
 +
x; # exchange original item and modified Wiki code/HTML-ID
 +
G; # now: original\nmodified
 +
s@([^\n]+)\n(.+)(…placeholder…)@\2\1@; # substitute placeholder by original item
 +
s@<span@\n&@; s@</span>@&\n@; # add some nicer line breaks
 +
'
 +
```
 
</syntaxhighlight>
 
</syntaxhighlight>
  

Revision as of 17:44, 5 January 2023

Use HTML export from Google and then use Tidy and Pandoc to convert to MediaWiki text format, and then do the following modifications (it is just in development stage, no guarantee for perfect translitteration)

convert2mediawiki-mwt.sh

#!/bin/bash
#   ….modified"  # default modifications for all
#   ….modified2" # particular modifications
#   ….modified3" # modifications by hand
# # # # # # # # # # # # # 
# dependency tidy
# dependency pandoc
# dependency sed

# for f in *.html;do 
#   f_new="${f}.mwt"
#   f_htmtidy="${f%.*}.tidy.html"
#   echo "# convert file to ${f_new} …"; pandoc -f html -t mediawiki "$f" > "${f_new}"; 
# done

# better use tidy
IFS=$'\n';

for f in $( find . -maxdepth 1 -iregex '\.[^.]+.html$' -and -not -iregex '\.[^.]+.tidy.html$' | sort );do 
  f_htmtidy="${f%.*}.tidy.html"
  f_htmtidy_log="${f%.*}.tidy.log"
  f_new="${f_htmtidy}.mwt"
  echo -e "# convert file to ${f_htmtidy} … \e[34m${f_new}\e[0m "; 
  # tidy -export-default-config > config_of_tidy.txt
  tidy -config config_of_tidy.txt "$f" > "$f_htmtidy" 2> "$f_htmtidy_log";
  pandoc -f html -t mediawiki "$f_htmtidy" > "${f_new}"; 
done

for f in $( find . -maxdepth 1 -iregex '\.[^.]+.tidy.html.mwt$' | sort ); # only not modified, original files
  do 
  f_new="${f}.modified"   # default modifications for all
  f_new2="${f}.modified2" # particular modifications
  # f_new3="${f}.modified3" # modifications by hand
  
  this_section=$(echo "$f" | sed --regexp-extended '
    s@.tidy.html.mwt$@@; s@^./@@g; 
    s@.*(Document Category \(for discussion\)).*@\1@I;
    s@.*(Document Category[ -–]+final).*@Overview@I;
    s@Document Types w o IPR[ -–]+[(]*draft[)]*@Document Types w/o IPR (draft)@I;
    s@Document Types w o IPR[ -–]+[(]*final[)]*@Biodiversity Permit/Contract Typology@I;
    s@intellectual Property Document Types \(draft\)@Intellectual Property Document Types (draft)@I;
    s@ReadMe@Introduction@I;
    s@\((AU specific permits)\)@\1@I;
    '
  )
  this_standardized_file_basename=$(echo "$f" | sed --regexp-extended '
    s@.tidy.html.mwt$@@; s@^./@@g; 
    s@.*(Document Category \(for discussion\)).*@\1@I;
    s@.*(Document Category[ -–]+[(]*final[)]).*@Document Category (final)@I;
    s@Document Types w[ -–]+o IPR[ -–]+[(]*draft[)]*@Document Types without IPR (draft)@I;
    s@Document Types w[ -–]+o IPR[ -–]+[(]*final[)]*@Document Types without IPR (final)@I;
    s@intellectual Property Document Types[ -–]+[(]*draft[)]*@Intellectual Property Document Types (draft)@I;
    s@intellectual Property Document Types[ -–]+[(]*final[)]*@Intellectual Property Document Types (final)@I;
    s@Typology of Contents[ -–]+[(]*draft[)]*@Typology of Contents (draft)@I;
    s@Typology of Contents[ -–]+[(]*final[)]*@Typology of Contents (final)@I;
    s@ReadMe@Readme@I;
    s@\((AU specific permits)\)@\1@I;
    '
  )

  # this_wiki_site is perhaps unused
  this_wiki_site=$(echo "$f" | sed --regexp-extended '
    s@.tidy.html.mwt$@@; s@^./@@g; 
    s@^ *Document Category[ -–]+final *$@The Biodiversity Permit/Contract Typology (Permit and Loan Terms, Handbook)@I;
    s@^ *Document Types w o IPR[ -–]+[(]*final[)]* *$@The Biodiversity Permit/Contract Typology (Permit and Loan Terms, Handbook)@I;
    s@^ *ReadMe *$@Permit and Loan Terms (Handbook)@I;
    s@^ *Typology of Contents[ -–]+[(]*draft[)]* *$@Typology of Legal/Contractual Terms for Biodiversity Specimens (Permit and Loan Terms, Handbook)@;
    # s@\((AU specific permits)\)@\1@I;
    '
  )
  this_section_html_anchor_id=$(echo "$f" | sed --regexp-extended '
    s@.tidy.html.mwt$@@; s@^./@@g; 
    s@.*(AU specific permits).*@<span id="AU_specific_permits"></span>@I;
    s@.*(EU specific permits).*@<span id="EU_specific_permits"></span>@I;
    s@.*(US specific permits).*@<span id="US_specific_permits"></span>@I;
    s@.*(Document Category \(for discussion\)).*@<span id="Document_Category_-_for_discussion"></span>@I;
    s@.*(Document Category - final).*@<span id="Document_Category"></span>@I;
    s@.*(Document Types w o IPR[ -–]+[(]*draft[)]*).*@<span id="Document_Types_without_IPR_draft"></span>@I;
    s@.*(Document Types w o IPR[ -–]+[(]*final[)]*).*@<span id="Document_Types_without_IPR_final"></span>@I;
    s@.*(intellectual Property Document Types[ -–]+[(]*draft[)]).*@<span id="Intellectual_Property_Document_Types_-_draft"></span>@I;
    s@.*(MTA examples).*@<span id="MTA_examples"></span>@I;
    s@.*(Permit Types \(GGBN\)).*@<span id="Permit_Types_GGBN"></span>@I;
    s@^.*(Typology of Contents[ -–]+[(]*draft[)]).*$@<span id="Typology_of_Contents"></span>@I;
    s@.*ReadMe.*@@I;
    /^<span/!{ s@ +@_@g; s@^(.+)$@<span id="unknown_conversion_in_testing_mode_\1"></span>@;}
    '
  )
  
  echo -e "# modify file to \e[34m${f_new}\e[0m …"; 
  sed --regexp-extended --null-data '
    s@\|\n<div class="[^ ]+"[^>]+>\n+([^<>\n]*?)\n+</div>@| \1@g; # table cells containers: removed
    s@^<div class="ritz grid-container" dir="ltr">\n(.+)\n+</div>\n*$@\1@g; # wrapping the whole sheet as div-container probably: removed
  ' "$f" > "${f_new}"; 

  sed --regexp-extended --null-data --in-place "1 i\== ${this_section}${this_section_html_anchor_id} ==\n\n

   # make all td to one line only, mark newline by νεωλινε
    /\n\|[[:blank:]][^|\n]+/{ # td with immediate following content
      :trow2one_line # replace \n to greek letters of newline (as a marker)
        s@(\n\|[[:blank:]][^|\n]+)\n([^\n|]+)@\1νεωλινε\2@; 
      ttrow2one_line;
    }
    /\n\|\n<div[^|\n]+/{ # td with immediate \n and *then* following content, e.g. <div> or anything
      :trow_seems_formatted2one_line # replace \n to greek letters of newline (as a marker)
        s@(\n\|\n<div[^|\n]+)\n([^\n|]+)@\1νεωλινε\2@; 
        s@(\n\|\n<div[^|\n]+)\n\n([^\n|]+)@\1νεωλινενεωλινε\2@; 
        s@(\n\|\n<div[^|\n]+)\n\n\n([^\n|]+)@\1νεωλινενεωλινενενεωλινε\2@; 
      ttrow_seems_formatted2one_line;
    }
    /\n\|\n<div[^|\n]+/{ # formatted td preprocessed td should be almost in one line
      s@(\n\|)\n(<div[^|\n]+)@\1νεωλινε\2@g;
    }

    # s@(\n\|[^\n|]+)(<br */>\n){1,}@\1\n@g; # remove trailing <br/> (and misplaced at the line end and cell end) 
    # s@(<br */>)\n@\1νεωλινε@g;
    s@(\.\.\.)@…@g;
    s@ &amp; @ \& @g;
  
    # convert general formats
    s@(<span style=\")font-weight:bold;(\">)([^<>]+)(</span>)@<strong>\3</strong>@g
    s@(<span style=\")font-weight:bold;([^\"]+\">)([^<>]+)(</span>)@<strong>\1\2\3\4</strong>@g
    
    s@(<span style=\")text-decoration:line-through;(\">)([^<>]+)(</span>)@<s>\3</s>@g
    s@(<span style=\")text-decoration:line-through;([^\"]+\">)([^<>]+)(</span>)@<s>\1\2\3\4</s>@g
    
    s@(<span style=\")text-decoration:underline;(\">)([^<>]+)(</span>)@<u>\3</u>@g
    s@(<span style=\")text-decoration:underline;([^\"]+\">)([^<>]+)(</span>)@<u>\3</u>@g
    
    s@(<span style=\")font-style:italic;([^\"]+\">)([^<>]+)(</span>)@<i>\1\2\3\4</i>@g
    s@(<span style=\")font-style:italic;(\">)([^<>]+)(</span>)@<i>\3</i>@g
    
    ## # idea: (1) make only in-line wiki table rows (2) remove redundant rows (3) return to right Wiki table syntax
    ## # wiki-tabledata inline formatting: removed
    ## /<div class=\"softmerge-inner\"[^>]+>/,/<\/div>/ {
    ##   s@\|\n<div class=\"softmerge-inner\"[^>]+>[\n]{2}([^\n]+)[\n]{2}</div>@| \1@g; # Einzeiler-Inhalte
    ##   # s@\|\n<div class=\"softmerge-inner\"[^>]+>[\n]{2}(.+)[\n]{2}</div>@| \1@g;  # Problem Mehrzeiler-Inhalte
    ## }
    /νεωλινε<div class=\"[^<>\n|]+>[^\n]+<\/div>/ {
      s@νεωλινε<div class=\"[^<>\n|]+>@@g;
      s@νεωλινε</div>@@g;
    }    ## # wiki-tabledata inline formatting: removed
    ## 
    ## 
    ## #
    ## # s@\|-\n\|[[:blank:]]+[[:digit:]]+[[:blank:]]*(\n\|){1,}(\n\|-)@\2@g; # approach in original formatting: not yet working to remove perfectly redundant table rows
    ## #
    ## 
    ## # marking Wiki-table-syntax and form only single rows of table data (no line break)
    ## s@\n(\|-)@\n<wiki_trow>\1@g;
    ## s@\n(\|\})@\n<wiki_endtable>\1@g;
    ## s@\n(\|)@\n<wiki_tdata>\1@g;
    ## s@\n(<wiki_tdata>)@ \1@g;
    ## #
    ## # make table data as Wiki inline syntax from \n|… \n|… into || … || …
    ## ## s@([[:blank:]]<wiki_tdata>\|)[[:blank:]]@\1| @g;
    ## s@(<wiki_tdata>\|)[[:blank:]]@\1| @g;
    ## s@([[:blank:]]<wiki_tdata>\|)[[:blank:]]*[\n]@\1|\n@g; # fix line end table data
    ## s@(\n<wiki_trow>\|-)([[:blank:]]<wiki_tdata>\|)\|@\1\2@g; # fix first table data: only one |
    ## #
    ## # should be all in-line: tr + td + td … now
    ## s@<wiki_trow>\|- <wiki_tdata>\| +[[:digit:]]+( +<wiki_tdata>\|\|){1,}\n@@g; # remove redundant “empty” table rows
    ## s@<wiki_trow>\|- <wiki_tdata>\|( +<wiki_tdata>\|\|){1,}\n@@g; # remove redundant “empty” table rows
    ## #
    ## # return to Wiki table syntax
    ## s@[[:blank:]]<wiki_tdata>(\|[[:blank:]])@\n\1@g;
    ## s@[[:blank:]]<wiki_tdata>(\|)\|@\n\1@g;
    ## s@<wiki_trow>(\|-)@\1@g;
    ## s@\n<wiki_endtable>(\|\})@\n\1@g;
    ## 
    /\&quot;/,/\&quot;/ { s@&quot;([^\n&]+)&quot;@“\1”@g; } # replace quotes to typographical ones: “…”
    # final formattings
    s@\{\|\n@{| class=\"vertical-align-top booktabledotted\"\n@g;
    # remove all A, B, C headings?
    # format some columns
    s@\| (Repmarks\n)@| style=\"min-width:150px\" | \1@Ig;
    s@\| (suggested improvements\n)@| style=\"min-width:150px\" | \1@Ig;
    " \
    "${f_new}"
    
    
    # for i in {1..30}; do echo "  s@(<wiki_tdata${i}>[^\\n]+)\\n(\|[[:blank:]\\n])@\1\\n<wiki_tdata$((i + 1))>\2@g;"; done
    case "$this_standardized_file_basename" in
    Readme)  
      echo -e "#   further modifications → \e[34m${f_new2}\e[0m (file $this_standardized_file_basename, section $this_section) …"
      sed --regexp-extended --null-data  '
      s@\n(\|-\n)(\|[[:blank:]\n])@\n\1<wiki_tdata1>\2@g;
      s@(<wiki_tdata1>[^\n]+)\n(\|[[:blank:]\n])@\1\n<wiki_tdata2>\2@g;
      s@(<wiki_tdata2>[^\n]+)\n(\|[[:blank:]\n])@\1\n<wiki_tdata3>\2@g; 
      s@(<wiki_tdata3>[^\n]+)\n(\|[[:blank:]\n])@\1\n<wiki_tdata4>\2@g;  
      s@(<wiki_tdata4>[^\n]+)\n(\|[[:blank:]\n])@\1\n<wiki_tdata5>\2@g;
      s@\n(<wiki_tdata[[:digit:]]>\|)([[:blank:]\n])@ \1\2@g;
      # remove index column
      s@(\n|- )<wiki_tdata1>\| [[:digit:]]+ (<wiki_tdata2>)@\1\2@g;
    # remove last empty column
      s@<wiki_tdata3>\|([[:blank:]]*)\n@\1\n@g;
      s@ <wiki_tdata3>\|[[:blank:]]+([^[:blank:]])@: \1@g;
    s@\n\|- <wiki_tdata2>\|[[:blank:]]+([^[:blank:]])@\n\1@g;
    /\&quot;/,/\&quot;/ { s@&quot;([^\n&]+)&quot;@“\1”@g; }
      s@\n(<wiki_tdata[[:digit:]]>\|)([[:blank:]]+)@\n\1\2@g;
      s@([^\n])(<wiki_tdata[[:digit:]]>\|)@\1\n\2@g;
      s@([\n])<wiki_tdata[[:digit:]]>(\|)@\1\2@g;
      ' "${f_new}" > "${f_new2}"
    ;;
    
    "Document Category (final)"\
    |"Document Category (for discussion)"\
    |"Document Types without IPR (draft)"\
    |"Document Types without IPR (final)"\
    |"Glossary"\
    |"EU specific permits"\
    |"US specific permits"\
    |"Intellectual Property Document Types (draft)"\
    |"Intellectual Property Document Types (final)"\
    |"MTA examples"\
    |"Permit Types (GGBN)"\
    |"Typology of Contents (draft)"\
    |"Typology of Contents (final)"\
    )
      echo -e "#   further modifications → \e[34m${f_new2}\e[0m (file $this_standardized_file_basename, section $this_section) …"
      sed --regexp-extended --null-data   '
      s@\n(\|-\n)(\|[[:blank:]\n])@\n\1<wiki_tdata1>\2@g;
      
      # /\n\|[[:blank:]][^|\n]+/{
      #   :trow2oneline s@(\n\|[[:blank:]][^|\n]+)\n([^\n|])@\1νεωλινε\2@; # replace \n to greek letters of newline (as a marker)
      #   ttrow2oneline;
      # }
      
      # s@(<br */>)\n@\1@g; # versuche tabledata zu Einzeilern zu machen
      # # geht nur, wenn trow Einzeiler!
      # for i in {31..41}; do echo "  s@(<wiki_tdata${i}>[^\\n]+)\\n(\|([[:blank:]\\n]|νεωλινε))@\1\\n<wiki_tdata$((i + 1))>\2@g;"; done
      s@(<wiki_tdata1>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata2>\2@g;
      s@(<wiki_tdata2>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata3>\2@g;
      s@(<wiki_tdata3>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata4>\2@g;
      s@(<wiki_tdata4>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata5>\2@g;
      s@(<wiki_tdata5>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata6>\2@g;
      s@(<wiki_tdata6>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata7>\2@g;
      s@(<wiki_tdata7>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata8>\2@g;
      s@(<wiki_tdata8>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata9>\2@g;
      s@(<wiki_tdata9>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata10>\2@g;
      s@(<wiki_tdata10>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata11>\2@g;
      s@(<wiki_tdata11>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata12>\2@g;
      s@(<wiki_tdata12>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata13>\2@g;
      s@(<wiki_tdata13>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata14>\2@g;
      s@(<wiki_tdata14>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata15>\2@g;
      s@(<wiki_tdata15>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata16>\2@g;
      s@(<wiki_tdata16>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata17>\2@g;
      s@(<wiki_tdata17>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata18>\2@g;
      s@(<wiki_tdata18>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata19>\2@g;
      s@(<wiki_tdata19>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata20>\2@g;
      s@(<wiki_tdata20>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata21>\2@g;
      s@(<wiki_tdata21>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata22>\2@g;
      s@(<wiki_tdata22>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata23>\2@g;
      s@(<wiki_tdata23>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata24>\2@g;
      s@(<wiki_tdata24>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata25>\2@g;
      s@(<wiki_tdata25>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata26>\2@g;
      s@(<wiki_tdata26>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata27>\2@g;
      s@(<wiki_tdata27>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata28>\2@g;
      s@(<wiki_tdata28>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata29>\2@g;
      s@(<wiki_tdata29>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata30>\2@g;
      s@(<wiki_tdata30>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata31>\2@g;
      s@(<wiki_tdata31>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata32>\2@g;
      s@(<wiki_tdata32>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata33>\2@g;
      s@(<wiki_tdata33>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata34>\2@g;
      s@(<wiki_tdata34>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata35>\2@g;
      s@(<wiki_tdata35>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata36>\2@g;
      s@(<wiki_tdata36>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata37>\2@g;
      s@(<wiki_tdata37>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata38>\2@g;
      s@(<wiki_tdata38>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata39>\2@g;
      s@(<wiki_tdata39>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata40>\2@g;
      s@(<wiki_tdata40>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata41>\2@g;
      s@(<wiki_tdata41>[^\n]+)\n(\|([[:blank:]\n]|νεωλινε))@\1\n<wiki_tdata42>\2@g;
      
     /\n(<wiki_tdata[[:digit:]]+>\|)([[:blank:]\n]|νεωλινε)/{
       :wiki_tdata2one_line; 
       s@\n(<wiki_tdata[[:digit:]]+>\|)([[:blank:]\n]|νεωλινε)@ \1\2@g; # convert tabledata to one row
       twiki_tdata2one_line;
     }
   ## remove first column, the index column
     s@(\n|- )<wiki_tdata1>\| [[:digit:]]+ (<wiki_tdata2>)@\1\2@g;
     s@(\n|- )<wiki_tdata1>\| (<wiki_tdata2>)@\1\2@g;
   ## (wiki_tdata2: must be the 1st | Wiki column not || attention!)
     s@(<wiki_tdata[3-9]>\|)([[:blank:]]+|νεωλινε)@\1|\2@g; # add || for 2-9 tabledata
     s@(<wiki_tdata[[:digit:]]{2,}>\|)([[:blank:]]+|νεωλινε)@\1|\2@g; # add || for 10-99 tabledata
     s@(<wiki_tdata[[:digit:]]{2,}>\|)(\n)@\1|\2@g; # add || for the last tabledata (10-99 tabledata)
     s@(<wiki_tdata[2-9]>\|)(\n)@\1|\2@g; # add || for the last tabledata (2-9 tabledata)
   ## should be all in-line: tr + td + td … now
    s@<wiki_tdata[[:digit:]]+>@@g;       # have only Wiki-inline || … || 
    s@( +\|\|){1,}\n@\1\n@g;             # remove redundant “empty” trailing table data
    s@ +\|\|\n@\n@g;                     # remove last empty table data
    # remove empty left rows 
      /\n(\|- \|\n)/{ 
       :empty_left_trow s@\n(\|- \|\n)@\n@g; tempty_left_trow
      }
    s@\n(\|-) (\|[^|\n]+)@\n\1\n\2@g;    # line break for first column
    s@ *\|\|( |νεωλινε)@\n|\1@g;         # all || to \n|
   ## clean up multiple \n and <br /> immediately after |
    s@\n\|(νεωλινε|<br */>){1,}@\n| @g;
      ' "${f_new}" > "${f_new2}" 
      ###############
      # from here on tr and td should be single rows, sometimes with νεωλινε marker in it all other line breaks shall and should have \n
      ###############
      case $this_standardized_file_basename in 
      "Document Category (final)"\
      |"Document Category (for discussion)"\
      )
        echo -e "#   further \e[34mc\e[0m\e[33mo\e[0m\e[32ml\e[0m\e[31mo\e[0m\e[34mr\e[0m modifications → \e[34m${f_new2}\e[0m (file $this_standardized_file_basename, section $this_section) …"
        sed --regexp-extended --in-place --null-data '
          s@\n\|-(\n\| Access and Benefit-Sharing Document)@\n|- style="background-color: #f4cccc;"\1@I;
          s@\n\|-(\n\| High level arrangements)@\n|- style="background-color: #f9cb9c;"\1@I;
          s@\n\|-(\n\| permits for collecting[^\n]+)@\n|- style="background-color: #fff2cc;"\1@I;
          s@\n\|-(\n\| Research Permission)@\n|- style="background-color: #d9ead3;"\1@I;
          s@\n\|-(\n\| material transfer agreements[^\n]+)@\n|- style="background-color: #d0e0e3;"\1@I;
          s@\n\|-(\n\| Transport Documents)@\n|- style="background-color: #c9daf8;"\1@I;
          s@\n\|-(\n\| Exemption Permission[^\n]+)@\n|- style="background-color: #d9d2e9;"\1@I;
        ' "${f_new2}"
      ;;
      esac

      # Document Types w/o IPR (draft)
      case $this_standardized_file_basename in 
      "Document Types without IPR (draft)"\
      |"Document Types without IPR (final)"\
      )
        echo -e "#   further \e[34mc\e[0m\e[33mo\e[0m\e[32ml\e[0m\e[31mo\e[0m\e[34mr\e[0m modifications → \e[34m${f_new2}\e[0m (file $this_standardized_file_basename, section $this_section) …"
        sed --regexp-extended --in-place --null-data ' # set trow to color
        s@\n(\|-)(\n\| Access and Benefit-Sharing[^\n]+)@\n\1 style="background-color: #f4cccc;"\2@Ig;
        s@\n(\|-)(\n\| High Level Arrangements[^\n]*)@\n\1 style="background-color: #f9cb9c;"\2@Ig;
        s@\n(\|-)(\n\| Permits for Research[^\n]*)@\n\1 style="background-color: #d9ead3;"\2@Ig;
        s@\n(\|-)(\n\| Research Permission[^\n]*)@\n\1 style="background-color: #d9ead3;"\2@Ig;
        s@\n(\|-)(\n\| material transfer agreements[^\n]*)@\n\1 style="background-color: #d0e0e3;"\2@Ig;
        s@\n(\|-)(\n\| Transport Document[^\n]*)@\n\1 style="background-color: #c9daf8;"\2@Ig;
          # check from 2nd column «status term»: rows with «will be removed», 
          #   «Term & definition approved»       → green   #93c47d
          #   «Term approved, check definition»  → yellow  #ffd966
          #   «Review in progress»               → yellow  #ffd966
          #   «under Review»                     → yellow  #ffd966
          #   «will be deleted»                  → gray    #cccccc
          #   «will be removed»                  → gray    #cccccc
        s@\n(\|-)([^\n]*)\n(\|[^\n]*)\n(\|)( will be (deleted|removed))@\n\1 style="background-color: #cccccc;"\n\3\n\4\5@Ig;
        s@\n(\|-)([^\n]*)\n(\|[^\n]*)\n(\|)( Term & definition approved)@\n\1\2\n\3\n\4 style="background-color: #93c47d;" |\5@Ig;
        s@\n(\|-)([^\n]*)\n(\|[^\n]*)\n(\|)( Term approved, check definition)@\n\1\2\n\3\n\4 style="background-color: #ffd966;" |\5@Ig;
        s@\n(\|-)([^\n]*)\n(\|[^\n]*)\n(\|)( Review in progress)@\n\1\2\n\3\n\4 style="background-color: #ffd966;" |\5@Ig;
        s@\n(\|-)([^\n]*)\n(\|[^\n]*)\n(\|)( under Review)@\n\1\2\n\3\n\4 style="background-color: #ffd966;" |\5@Ig;
        ' "${f_new2}"
      ;;
      esac

      case $this_standardized_file_basename in 
      "Intellectual Property Document Types (draft)"\
      |"Intellectual Property Document Types (final)"\
      )
        echo -e "#   further \e[34mc\e[0m\e[33mo\e[0m\e[32ml\e[0m\e[31mo\e[0m\e[34mr\e[0m modifications → \e[34m${f_new2}\e[0m (file $this_standardized_file_basename, section $this_section) …"
        sed --regexp-extended --in-place --null-data ' # set trow to color
        # check from 2nd column «status term»: rows with «will be removed», 
          #   «Term approved, check definition»  → green   #93c47d
          #   «Review in progress»               → orange  #ff9900
          #   «Patent License»                   → red     #ff0000
        s@\n(\|-)([^\n]*)\n(\|[^\n]*)\n(\|)( Term approved, check definition)@\n\1\2\n\3\n\4 style="background-color: #93c47d;" |\5@Ig;
        s@\n(\|-)([^\n]*)\n(\|[^\n]*)\n(\|)( Review in progress)@\n\1\2\n\3\n\4 style="background-color: #ff9900;" |\5@Ig;
        # check from 3rd column
        s@\n(\|-)([^\n]*)\n(\|[^\n]*)\n(\|)([^\n]*)\n(\|)([^\n]*Patent License[^\n]*)@\n\1\2\n\3\n\4\5\n\6 style="background-color: #ff0000;" |\7@Ig;
        ' "${f_new2}"
      ;;
      esac

      case $this_standardized_file_basename in 
      "MTA examples"\
      )
      echo -e "#   further \e[34mc\e[0m\e[33mo\e[0m\e[32ml\e[0m\e[31mo\e[0m\e[34mr\e[0m modifications → \e[34m${f_new2}\e[0m (file $this_standardized_file_basename, section $this_section) …"
        sed --regexp-extended --in-place --null-data '
          s@\n(\| Term approved[^\n]+)@\n| style="background-color: #93c47d;"\1@g;
        ' "${f_new2}"
      ;;    
      esac
    
      case $this_standardized_file_basename in 
      "Typology of Contents (draft)"\
      |"Typology of Contents (final)"\
      )
      echo -e "#   further \e[34mc\e[0m\e[33mo\e[0m\e[32ml\e[0m\e[31mo\e[0m\e[34mr\e[0m modifications → \e[34m${f_new2}\e[0m (file $this_standardized_file_basename, section $this_section) …"
        sed --regexp-extended --in-place --null-data ' # replaced removed
          # check from 3rd column «status term»: rows with «will be removed», 
          #   «removed»                   → gray    #cccccc
          #   «replaced»                  → gray    #cccccc
          #   «under review»              → yellow  #ffd966 TODO
          s@\n(\|-)([^\n]*)\n(\|[^\n]*)\n(\|)([^\n]*)\n(\|)( replaced| removed)@\n\1 style="background-color: #cccccc;"\n\3\n\4\5\n\6\7@Ig;
          s@\n(\|-)([^\n]*)\n(\|[^\n]*)\n(\|)([^\n]*)\n(\|)( under review)@\n\1\n\3\n\4\5\n\6 style="background-color: #ffd966;"|\7@Ig;
        # y        green  #b6d7a8
        # possible green  #b6d7a8
        # approved green  #b6d7a8
        # n/a      orange #f9cb9c
          s@(\| )(Y\n)@\1style="background-color: #b6d7a8;"| \2@Ig;
          s@(\| )(approved\n)@\1style="background-color: #b6d7a8;"| \2@Ig;
          s@(\| )(possible\n)@\1style="background-color: #b6d7a8;"| \2@Ig;
          s@(\| )(n/a\n)@\1style="background-color: #f9cb9c;"| \2@Ig;
        # TODO remove color from trow that has gray #cccccc
          ## /\n\|- style="background-color: #cccccc;"\n/{
          ##   :trow_status_removed
          ##   /\n\|- style="background-color: #cccccc;"\n([^\n]+){1,}/{
          ##   }  
          ## }
        ' "${f_new2}"
      ;;
      esac
    
    ;;

    *) 
    ;;
    esac

    # for clean_up_file in … TODO
    # clean up trailing <br/> within td … TODO
    # remove null character
   sed --in-place '
    s@\x0@@g; 
    s@νεωλινε@\n@g; # return \n from νεωλινε as it was before
   ' "${f_new}"
   if [[ -e "${f_new2}" ]]; then
    sed --in-place '
    s@\x0@@g;
    s@νεωλινε@\n@g; # return \n from νεωλινε as it was before
   ' "${f_new2}"; 
   fi

done
unset IFS


Generate HTML-IDs from list items usig sed.js.org/#

## Permits for collecting aso.

- Authorisaton to enter site
- Collecting permit
- Taking: “incidental take” permit
- Taking: Migratory Bird Treaty Act (MBTA) Special Purpose, Salvage Permit
- Taking: Salvage Permit (e.g., Non-US, US federal, state, local)
- Possessing: Receiving permit
- Exemption evidence

```
* [[#Permits_for_collecting_aso-definition_Authorisaton_to_enter_site|Authorisaton to enter site]] 
* [[#Permits_for_collecting_aso-definition_Collecting_permit|Collecting permit]] 
* [[#Permits_for_collecting_aso-definition_Taking_incidental_take_permit|Taking: “incidental take” permit]] 
* [[#Permits_for_collecting_aso-definition_Taking_Migratory_Bird_Treaty_Act.Special_Purpose._Salvage_Permit|Taking: Migratory Bird Treaty Act (MBTA) Special Purpose, Salvage Permit]] 
* [[#Permits_for_collecting_aso-definition_Taking_Salvage_Permit|Taking: Salvage Permit (e.g., Non-US, US federal, state, local)]] 
* [[#Permits_for_collecting_aso-definition_Possessing_Receiving_permit|Possessing: Receiving permit]] 
* [[#Permits_for_collecting_aso-definition_Exemption_evidence|Exemption evidence]] 

<span id="Permits_for_collecting_aso-definition_Authorisaton_to_enter_site"></span>
<span id="Permits_for_collecting_aso-definition_Collecting_permit"></span>
<span id="Permits_for_collecting_aso-definition_Taking_incidental_take_permit"></span>
<span id="Permits_for_collecting_aso-definition_Taking_Migratory_Bird_Treaty_Act.Special_Purpose._Salvage_Permit"></span>
<span id="Permits_for_collecting_aso-definition_Taking_Salvage_Permit"></span>
<span id="Permits_for_collecting_aso-definition_Possessing_Receiving_permit"></span>
<span id="Permits_for_collecting_aso-definition_Exemption_evidence"></span>
```

```bash
sed --regexp-extended '
s@^- +@@;
h; # get original item to hold space
s@^([^()]+) \(.+\)$@\1@g; # modifications: item → Wiki-Code/HTML-ID
s@\x27@@g; # single quote
s@Migratory Bird Treaty Act \(MBTA\)[ ]@Migratory Bird Treaty Act,@;
s@ +\(@-(@g; s@[()“”:]@@g;
s@ +$@@g; s@^ +@@g; s@ \& @ and @; s@/@ or @g;
s@^@Permits for collecting aso-definition_@;
s@ +@_@g;
# no punctation except - and …
s@[-]@στριχψ@g; s@[_]@τιεφστριχψ@g; 
s@[[:punct:]]+@.@g;
s@τιεφστριχψ@_@g; s@στριχψ@-@g; 

s@(.+)@[[#\1|…placeholder…]] <span id="\1"></span>@;

x; # exchange original item and modified Wiki code/HTML-ID
G; # now: original\nmodified
s@([^\n]+)\n(.+)(…placeholder…)@\2\1@; # substitute placeholder by original item
s@<span@\n&@; s@</span>@&\n@; # add some nicer line breaks
'
```

config_of_tidy.txt

accessibility-check: 0 (Tidy Classic)
add-meta-charset: no
add-xml-decl: no
add-xml-space: no
alt-text: 
anchor-as-name: yes
ascii-chars: no
assume-xml-procins: no
bare: no
break-before-br: no
char-encoding: utf8
clean: no
coerce-endtags: yes
css-prefix: c
custom-tags: no
decorate-inferred-ul: no
doctype: auto
drop-empty-elements: yes
drop-empty-paras: yes
drop-proprietary-attributes: no
enclose-block-text: no
enclose-text: no
error-file: 
escape-cdata: no
escape-scripts: yes
fix-backslash: yes
fix-bad-comments: auto
fix-style-tags: yes
fix-uri: yes
force-output: no
gdoc: no
gnu-emacs: no
hide-comments: no
indent: no
indent-attributes: no
indent-cdata: no
indent-spaces: 2
indent-with-tabs: no
input-encoding: utf8
input-xml: no
join-classes: no
join-styles: yes
keep-tabs: no
keep-time: no
literal-attributes: no
logical-emphasis: no
lower-literals: yes
markup: yes
merge-divs: auto
merge-emphasis: yes
merge-spans: auto
mute: 
mute-id: no
ncr: yes
new-blocklevel-tags: 
new-empty-tags: 
new-inline-tags: 
new-pre-tags: 
newline: LF
numeric-entities: no
omit-optional-tags: no
output-bom: auto
output-encoding: utf8
output-file: 
output-html: no
output-xhtml: no
output-xml: no
preserve-entities: no
priority-attributes: 
punctuation-wrap: no
quiet: no
quote-ampersand: yes
quote-marks: no
quote-nbsp: yes
repeated-attributes: keep-last
replace-color: no
show-body-only: no
show-errors: 6
show-filename: no
show-info: yes
show-meta-change: no
show-warnings: yes
skip-nested: yes
sort-attributes: none
strict-tags-attributes: no
tab-size: 8
tidy-mark: yes
uppercase-attributes: no
uppercase-tags: no
vertical-space: no
warn-proprietary-attributes: yes
word-2000: no
wrap: 0
wrap-asp: yes
wrap-attributes: no
wrap-jste: yes
wrap-php: no
wrap-script-literals: no
wrap-sections: yes
write-back: no