<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE script:module PUBLIC "-//OpenOffice.org//DTD OfficeDocument 1.0//EN" "module.dtd">
-<script:module xmlns:script="http://openoffice.org/2000/script" script:name="Main" script:language="StarBasic">' Matt McCutchen's SuperbChemistry for OpenOffice, version 1
+<script:module xmlns:script="http://openoffice.org/2000/script" script:name="Main" script:language="StarBasic">' Matt McCutchen's SuperbChemistry for OpenOffice, version 2
'
' Applies superscript and subscript formatting to chemical formulas in text.
'
+' Rules:
+' - Quantities [0-9]+ and charges [0-9]*[-+−] are recognized after an element
+' symbol [A-Z][a-z]? or a closing delimiter [])}] . Hyphens are converted
+' into real minus signs.
+' - A charge sign [-+−] is ignored if it is followed by a letter, digit,
+' opening delimiter, or [<>] . (Charges should appear only at the end of a
+' formula, and we want to avoid matching ordinary hyphens in text.)
+' - When digits followed by a charge sign are recognized, the last digit
+' becomes part of the charge and the remaining digits become the quantity.
+' (Charges rarely have absolute value more than 9.)
+' - Exception: If a single digit follows O or a closing delimiter, that digit
+' is always the quantity. (Handle NO3- and Fe(OH)2+. I think oxygen is the
+' only element that frequently has a quantity as part of a +/-1 ion. A group
+' is rarely parenthesized unless it has a quantity.)
+'
' Examples:
' C12345 ==> C_{12345}
' H+ ==> H^+
' Cl- ==> Cl^-
' Fe3+ ==> Fe^{3+}
+' SO42- ==> SO_4^{2-}
' C1232+ ==> C_{123}^{2+}
' N2- ==> N^{2-}
-' Exception for O and ): NO3- ==> NO_3^-, Fe(OH)2- ==> Fe(OH)_2^-
-' But still O12 ==> O_{12}
-' 4+ ==> 4+ (not a superscript by itself)
+' NO3- ==> NO_3^-
+' Fe(OH)2- ==> Fe(OH)_2^-
+' O12 ==> O_{12}
+' y4- ==> y4-
+' x2 ==> x2
+' Foo2 ==> Foo2
+' TI-89 ==> TI-89
' Regular expression replace in the document,
' creating superscripts if superb > 0 or subscripts if superb < 0.
dim rd as object
rd = doc.createReplaceDescriptor()
+rd.SearchCaseSensitive = true
rd.SearchRegularExpression = true
rd.setSearchString(searchStr)
rd.setReplaceString(replaceStr)
' Formats the current document
sub FormatDocument
-' Mark candidate superscripts so we know they follow letters or ).
-SuperbReplace(ThisComponent, "[A-Za-z)][0-9]*[-+−]", "&@l@", 0)
+' Idiom: Match something and tag it on the left or right with @x@
+' for further processing. If the replacement text could use
+' backreferences, this would be easier.
+
+' Tag candidate quantity/charges following symbols or ).
+SuperbReplace(ThisComponent, "([A-Z][a-z]?|[\])}])[0-9]*[-+−]", "&@g@", 0)
+
+' Disqualify + and - in compound words, etc.
+SuperbReplace(ThisComponent, "@g@[[({A-Za-z0-9<>]", "@G@&", 0)
+SuperbReplace(ThisComponent, "@G@@g@", "", 0)
-' O and ) grab a single digit. Block it off from becoming a superscript.
-SuperbReplace(ThisComponent, "[O)][0-9]", "&@n@", 0)
+' O and )]} grab a single digit as quantity.
+SuperbReplace(ThisComponent, "[\])}O][0-9]", "&@n@", 0)
-' Real minus signs in superscripts.
-SuperbReplace(ThisComponent, "-@l@", "−@l@", 0)
+' Real minus signs in charges.
+SuperbReplace(ThisComponent, "-@g@", "−@g@", 0)
-' Make superscripts: at most one digit.
-SuperbReplace(ThisComponent, "[0-9]?[−+]@l@", "@q@&", 1)
+' Make charges: at most one digit.
+SuperbReplace(ThisComponent, "[0-9]?[−+]@g@", "@q@&", 1)
-' Remove the O and ) markers.
+' Remove the O and ) markers in case of O57.
SuperbReplace(ThisComponent, "@n@", "", 0)
-' Mark off subscripts: as many digits as we can still grab.
-SuperbReplace(ThisComponent, "[A-Za-z)][0-9]+", "&@n@", 0)
+' Tag quantities: as many digits as we can still grab.
+SuperbReplace(ThisComponent, "([A-Z][a-z]?|[\])}])[0-9]+", "&@n@", 0)
-' Make subscripts.
+' Make quantities.
SuperbReplace(ThisComponent, "[0-9]+@n@", "&", -1)
' Clean up all markers.
-SuperbReplace(ThisComponent, "@[lnq]@", "", 0)
+SuperbReplace(ThisComponent, "@[gGnq]@", "", 0)
end sub