namespace NoNS = "" namespace pdf1 = "http://iso.org/pdf/ssn" namespace pdf2 = "http://iso.org/pdf2/ssn" namespace mml = "http://www.w3.org/1998/Math/MathML" # These Namespaces to be confirmed. "data:,Layout" would be another possibility namespace Layout = "http://iso.org/pdf/ssn/Layout" namespace PrintField = "http://iso.org/pdf/ssn/PrintField" namespace Table = "http://iso.org/pdf/ssn/Table" namespace List = "http://iso.org/pdf/ssn/List" namespace Artifact = "http://iso.org/pdf/ssn/Artifact" # XHTML Namespace, not currently used namespace h = "http://www.w3.org/1999/xhtml" # slightly modified MathML-Core schema. include "latex-mathml.rnc" {start |= notAllowed} # Extensions ## MathML Namespace (temp actualtext moved to content element in new code) mo.attributes &= attribute actualtext {text}? mpadded-length-percentage |= xsd:string { pattern = '\s*(([\-+]?[0-9]*([0-9]\.?|\.[0-9])[0-9]*(r?em|ex|in|cm|mm|p[xtc]|Q|v[hw]|vmin|vmax|%))|0)\s*' } # Attributes When Rolemaps are needed for PDF2 elements used in PDF1 pdf1rolemap-Any = notAllowed? pdf1rolemap-P = notAllowed? pdf1rolemap-Span = notAllowed? pdf1rolemap-Note = notAllowed? # UA-2 Single Document element root. start = Document | DocumentFragment ## share with pdf1.7 version from here to end textorHTML &= (Link|Reference|Strong|Code|Em|Lbl|Span|math|Quote|RB|Annot|Figure|Form)* ### PDF structure Namespace pdf2-attributes = structure-properties, layout-attributes, otherns-attributes, showtags-attributes showtags-attributes = attribute af {text}?, attribute rolemapped-from {text}?, attribute referenced-as {text}? # Standard Attribute Owner Namespaces (such as CSS3, HTML-5.00) apart from the ones listed here # and also vendor-specific namespaced attributes are allowed with any name/value. otherns-attributes = attribute (* - (NoNS:*|Layout:*|PrintField:*|Table:*|List:*|Artifact:*)) {text}* # Properties are modelled as attributes in no namespace structure-properties = attribute lang {text}?, # Lang attribute expanded {text}?, # E attribute actualtext {text}?, # ActualText attribute alt {text}?, # Alt attribute title {text}?, # T attribute id {text}?, # ID attribute phoneme {text}?, # Phoneme attribute phonetic-alphabet {text}?, # PhoneticAlphabet attribute revision {text}? # R layout-attributes = # Only validate Layout attributes with Owner Layout # PDF 2 states Attributes in this category shall be defined in attribute objects whose O # (owner) entry has the value Layout or whose owner is any other owner excluding List, Table, # PrintField and Artifact. # attribute Layout:Placement {"Block" | "Inline" | "Before" | "Start" | "End"}?, attribute Layout:WritingMode {"LrTb" | "RlTb" | "TbRl" | "TbLr" | "LrBt" | "RlBt" | "BtRl" | "BtLr"}?, attribute Layout:BackgroundColor {text}?, attribute Layout:BorderColor {text}?, attribute Layout:BorderStyle {text}?, attribute Layout:BorderThickness {text}?, attribute Layout:Color {text}?, attribute Layout:Padding {text}?, # attribute Layout:SpaceBefore {text}?, attribute Layout:SpaceAfter {text}?, attribute Layout:StartIndent {text}?, attribute Layout:EndIndent {text}?, # attribute Layout:TextIndent {text}?, attribute Layout:TextAlign { "Start" | "Center" | "End" | "Justify"}?, attribute Layout:BBox {text}?, # see apryse PDFA example # attribute Layout:LineHeight {text}?, attribute Layout:BaselineShift {text}?, attribute Layout:TextDecorationType {"" | "Underline" | "Overline" | "LineThrough"}?, attribute Layout:TextPosition {"Sup" | "Sub" | "Normal"}?, attribute Layout:TextDecorationColor {text}?, attribute Layout:TextDecorationThickness {text}?, # attribute Layout:ColumnCount {text}?, attribute Layout:ColumnWidths {text}?, attribute Layout:ColumnGap {text}?, # attribute Layout:GlyphOrientationVertical {text}? list-attributes = attribute List:ListNumbering {"" | "Unordered" | "Description" | "Disc" | "Circle" | "Square" | "Ordered" | "Decimal" | "UpperRoman" | "LowerRoman" | "UpperAlpha" | "LowerAlpha"}?, attribute List:ContinuedList {text}?, attribute List:ContinuedFrom {text}? printfield-attributes = attribute PrintField:Role {text}?, attribute PrintField:Checked {text}?, attribute PrintField:checked {text}?, attribute PrintField:Desc {text}? table-attributes = attribute Table:RowSpan {text}?, attribute Table:ColSpan {text}?, attribute Table:Headers {text}?, attribute Table:Summary {text}? artifact-attributes = attribute Artifact:Type {text}?, attribute Artifact:BBox {text}?, attribute Artifact:SubType {text}? document.n = (Document|DocumentFragment|Part|Art|Div|Sect|TOC|Aside|BlockQuote|NonStruct|Private|P|Note|Code|Hn|Title|Link|Annot|Form|FENote|Index|L|Table|Figure|Formula|Artifact) document.1 = H? Hn=H1|H2|H3|H4|H5|H6|H7 ### Document Level Document = element pdf2:Document { pdf2-attributes, ((document.1) & (document.n)*) } DocumentFragment = element pdf2:DocumentFragment { pdf1rolemap-Any, pdf2-attributes, ((document.1) & (document.n)*) } ### Grouping Part = element pdf2:Part { pdf2-attributes, (document.n|H|TOCI|Sub|Lbl|Reference|BibEntry|Caption)* } Sect = element pdf2:Sect{ pdf2-attributes, (H? & (DocumentFragment|Part|Art|Div|Sect|TOC|Aside|BlockQuote|NonStruct|Private|P|Note|Code|Hn|Title|Lbl|Link|Annot|Form|FENote|Index|L|Table|Caption|Figure|Formula|Artifact)*) } Aside = element pdf2:Aside { pdf1rolemap-Any, pdf2-attributes, (H?&Caption?& (text| Document|DocumentFragment|Part|Art|Div|Sect|TOC|BlockQuote|NonStruct|Private|P|Note|Code|Hn|Lbl|Link|Reference|Annot|Form|FENote|Index|L|Table|Figure|Formula|Artifact)*) } NonStruct = element pdf2:NonStruct{ pdf2-attributes, (text|Document|DocumentFragment|Part|Art|Div|Sect|TOC|TOCI|Aside|BlockQuote|NonStruct|Private|P|Note|Code|Hn|H|Title|Sub|Lbl|Em|Strong|Span|Quote|Link|Reference|Annot|Form|Ruby|RB|RT|RP|Warichu|WT|WP|FENote|Index|L|LI|LBody|BibEntry|Table|TR|TH|TD|THead|TBody|TFoot|Caption|Figure|Formula|Artifact)* } Div = element pdf2:Div { pdf2-attributes, (Document|DocumentFragment|Part|Art|Div|Sect|TOC|TOCI|Aside|BlockQuote|NonStruct|Private|P|Note|Code|Hn|H|Title|Sub|Lbl|Em|Strong|Span|Quote|Link|Reference|Annot|Form|Ruby|RB|RT|RP|Warichu|WT|WP|FENote|Index|L|LI|LBody|BibEntry|Table|TR|TH|TD|THead|TBody|TFoot|Caption|Figure|Formula|Artifact)* } ## Block P = element pdf2:P { pdf2-attributes, (text|NonStruct|Private|Note|Code|Sub|Lbl|Em|Strong|Span|Quote|Link|Reference|Annot|Form|Ruby|Warichu|FENote|L|BibEntry|Table|Figure|Formula|Artifact)* } sechead.content = pdf2-attributes, (Art?&Sect?&(text|NonStruct|Private|Note|Code|Sub|Lbl|Em|Strong|Span|Quote|Link|Reference|Annot|Form|Ruby|Warichu|FENote|BibEntry|Figure|Formula|Artifact)*) H1 = element pdf2:H1 {sechead.content} H2 = element pdf2:H2 {sechead.content} H3 = element pdf2:H3 {sechead.content} H4 = element pdf2:H4 {sechead.content} H5 = element pdf2:H5 {sechead.content} H6 = element pdf2:H6 {sechead.content} H7 = element pdf2:H7 { pdf1rolemap-Any, sechead.content} H = element pdf2:H {sechead.content} Title = element pdf2:Title { pdf1rolemap-P, pdf2-attributes, (text|Part|Div|Aside|NonStruct|Private|P|Note|Code|Lbl|Em|Strong|Span|Quote|Link|Reference|Annot|Form|Ruby|Warichu|FENote|L|BibEntry|Table|Caption|Figure|Formula|Artifact)* } fenote-attributes = notAllowed? FENote = element pdf2:FENote { pdf1rolemap-Note, pdf2-attributes, fenote-attributes, ( (DocumentFragment|Part|Art|Div|Sect|Aside|BlockQuote|NonStruct|Private|P|Note|Code|Sub|Lbl|Link|Reference|Annot|Form|Ruby|Warichu|FENote|L|Table|Caption|Figure|Formula|Artifact)* # Grouping | (text|Part|Art|Div|Sect|Aside|BlockQuote|NonStruct|Private|P|Note|Code|Sub|Lbl|Em|Strong|Span|Quote|Link|Reference|Annot|Form|Ruby|Warichu|FENote|L|Table|Figure|Formula|Artifact)* # Block ) } BibEntry = element pdf2:BibEntry { pdf2-attributes, (text|Part|Div|NonStruct|Private|P|Note|Lbl|Em|Strong|Span|Link|Reference|Annot|FENote|Figure|Artifact)* } ### Sub Block Sub = element pdf2:Sub { pdf1rolemap-Any, pdf2-attributes, (text|NonStruct|Private|Note|Code|Lbl|Em|Strong|Span|Quote|Link|Reference|Annot|Form|Ruby|Warichu|FENote|L|BibEntry|Figure|Formula|Artifact)* } ### Inline Lbl = element pdf2:Lbl { pdf2-attributes, (text|NonStruct|Private|Note|Code|Sub|Em|Strong|Span|Quote|Link|Reference|Annot|Form|Ruby|Warichu|FENote|BibEntry|Figure|Formula|Artifact| math)* } Em = element pdf2:Em { pdf1rolemap-Span, pdf2-attributes, (text|NonStruct|Private|Note|Code|Sub|Lbl|Em|Strong|Span|Quote|Link|Reference|Annot|Form|Ruby|Warichu|FENote|BibEntry|Figure|Formula|Artifact)* } Strong = element pdf2:Strong { pdf1rolemap-Span, pdf2-attributes, (text|NonStruct|Private|Note|Code|Sub|Lbl|Em|Strong|Span|Quote|Link|Reference|Annot|Form|Ruby|Warichu|FENote|BibEntry|Figure|Formula|Artifact)* } Span = element pdf2:Span { pdf2-attributes, (text|NonStruct|Private|Note|Code|Sub|Lbl|Em|Strong|Span|Quote|Link|Reference|Annot|Form|Ruby|Warichu|FENote|BibEntry|Figure|Formula|Artifact)* } Link = element pdf2:Link { pdf2-attributes, ( (DocumentFragment|Part|Art|Div|Sect|Aside|BlockQuote|NonStruct|Private|P|Note|Code|Hn|H|Title|Sub|Lbl|Annot|Form|Ruby|Warichu|FENote|L|BibEntry|Table|Caption|Figure|Formula|Artifact| math)* # Grouping | (text|Art|Div|Sect|NonStruct|Private|Note|Code|Sub|Lbl|Em|Strong|Span|Quote|Reference|Annot|Ruby|Warichu|FENote|BibEntry|Table|Figure|Formula|Artifact| math)* # ) } Annot = element pdf2:Annot { pdf2-attributes, ( (DocumentFragment|Part|Art|Div|Sect|Aside|BlockQuote|NonStruct|Private|P|Note|Code|Hn|H|Title|Sub|Lbl|Link|Reference|Annot|Form|Ruby|Warichu|FENote|L|BibEntry|Table|Caption|Figure|Formula|Artifact)* # Grouping | (text|Art|Div|Sect|NonStruct|Private|Note|Code|Sub|Lbl|Em|Strong|Span|Quote|Link|Annot|Ruby|Warichu|FENote|BibEntry|Figure|Formula|Artifact)* # )} Form = element pdf2:Form { pdf2-attributes, attribute Layout:Width {text}?, attribute Layout:Height {text}?, printfield-attributes, ( (Caption? & (Part|Div|NonStruct|Private|Note|Code|Lbl|Reference|FENote|L|BibEntry|Table|Figure|Formula|Artifact))* # Grouping | (Caption? & (text|Div|NonStruct|Private|Note|Lbl|FENote|BibEntry|Artifact)*) # Block ) } ### Ruby # [a] == Table 369 ISO 32000-2 Ruby = element pdf2:Ruby { pdf2-attributes, attribute Layout:RubyAlign {"Start" | "Center" | "End" | "Justify" | "Distribute"}?, attribute Layout:RubyPosition {"Before" | "After" | "Warichu" | "Inline"}?, ( (text|NonStruct|Private)* & (RB,(RT|(RB,RT,RP))) ) } RB = element pdf2:RB { pdf2-attributes, attribute Layout:RubyAlign {"Start" | "Center" | "End" | "Justify" | "Distribute"}?, attribute Layout:RubyPosition {"Before" | "After" | "Warichu" | "Inline"}?, (text|NonStruct|Private|Sub|Em|Strong|Span|Quote|Link|Reference|Annot|Link|Form|Artifact)* } RT = element pdf2:RT { pdf2-attributes, attribute Layout:RubyAlign {"Start" | "Center" | "End" | "Justify" | "Distribute"}?, attribute Layout:RubyPosition {text}?, (text|NonStruct|Private|Sub|Em|Strong|Span|Quote|Link|Reference|Annot|Link|Form|Artifact)* } RP = element pdf2:RP { pdf2-attributes, attribute Layout:RubyAlign {text}?, attribute Layout:RubyPosition {text}?, (text|NonStruct|Private|Sub|Em|Strong|Span|Quote|Link|Reference|Annot|Link|Form|Artifact)* } # [b] = Table 369 ISO 32000-2 Warichu = element pdf2:Warichu { pdf2-attributes, ((text|NonStruct|Private)* & (WP,WT,WP) ) } WT = element pdf2:WT { pdf2-attributes, (text|NonStruct|Private|Sub|Em|Strong|Span|Quote|Link|Reference|Annot|Form|Artifact)* } WP = element pdf2:WP { pdf2-attributes, (text|NonStruct|Private|Sub|Em|Strong|Span|Quote|Link|Reference|Annot|Form|Artifact)* } ### Lists # Caption here is 0..1 matching 32005 # Table 370 ISO 32000-2 says first or last which would be # (Caption, (NonStruct|Private|L|LI|Artifact)*) | ((NonStruct|Private|L|LI|Artifact)*, Caption?) L = element pdf2:L { pdf2-attributes, list-attributes, (Caption? & (NonStruct|Private|L|LI|Artifact)*) } LI = element pdf2:LI { pdf2-attributes, (text|Div|NonStruct|Private|Lbl|LBody|Artifact)* } LBody = element pdf2:LBody { pdf2-attributes, (H?&Caption?& (text|Part|Art|Div|Sect|Aside|BlockQuote|NonStruct|Private|P|Note|Code|Hn|Sub|Em|Strong|Span|Quote|Link|Reference|Annot|Form|Ruby|Warichu|FENote|Index|L|BibEntry|Table|Figure|Formula|Artifact)*) } Index = element pdf2:Index { pdf2-attributes, (Part|Div|Sect|NonStruct|Private|P|Note|Hn|H|Reference|Annot|FENote|L|Table|Caption|Figure|Formula|Artifact)* } ### Tables # Caption here is 0..1 matching 32005 # Table 371 ISO 32000-2 says first or last which would be # (Caption, (...)) | ((...), Caption?) Table = element pdf2:Table { pdf2-attributes, attribute Layout:Width {text}?, attribute Layout:Height {text}?, table-attributes, ((NonStruct|Private|TR|Artifact)* & (THead?,TBody*,TFoot?) & Caption?) } THead = element pdf2:THead { pdf2-attributes, table-attributes, (NonStruct|Private|TR|Artifact)* } TBody = element pdf2:TBody { pdf2-attributes, table-attributes, (NonStruct|Private|TR|Artifact)* } TFoot = element pdf2:TFoot { pdf2-attributes, (NonStruct|Private|TR|Artifact)* } TR = element pdf2:TR { pdf2-attributes, table-attributes, (NonStruct|Private|TH|TD|Artifact)* } TH = element pdf2:TH { pdf2-attributes, attribute Layout:Width {text}?, attribute Layout:Height {text}?, attribute Layout:BlockAlign {"Before" | "Middle" | "After" | "Justify"}?, attribute Layout:InlineAlign {"Start" | "Center" | "End"}?, attribute Layout:TBorderStyle {text}?, attribute Layout:TPadding {text}?, # table-attributes, attribute Table:Scope {"Column" | "Row" | "Both"}?, attribute Table:Short {text}?, (H? & (text|Art|Div|Sect|NonStruct|Private|P|Note|Code|Hn|Lbl|Em|Strong|Span|Quote|Link|Reference|Annot|Form|Ruby|Warichu|FENote|Index|L|BibEntry|Table|Figure|Formula|Artifact)*) } TD = element pdf2:TD { pdf2-attributes, attribute Layout:Width {text}?, attribute Layout:Height {text}?, attribute Layout:BlockAlign {"Before" | "Middle" | "After" | "Justify"}?, attribute Layout:InlineAlign {"Start" | "Center" | "End"}?, attribute Layout:TBorderStyle {text}?, attribute Layout:TPadding {text}?, table-attributes, (H? & (text|Art|Div|Sect|NonStruct|Private|P|Note|Code|Hn|Lbl|Em|Strong|Span|Quote|Link|Reference|Annot|Form|Ruby|Warichu|FENote|Index|L|BibEntry|Table|Figure|Formula|Artifact)*) } # Captions and Figures Caption = element pdf2:Caption { pdf2-attributes, ( (H? & (DocumentFragment|Part|Art|Div|Sect|Aside|BlockQuote|NonStruct|Private|P|Note|Code|Hn|Sub|Lbl|Link|Reference|Annot|Form|Ruby|Warichu|FENote|Index|L|BibEntry|Table|Figure|Formula|Artifact)*) # Grouping | (H? & (text|Part|Art|Div|Sect|Aside|BlockQuote|NonStruct|Private|P|Note|Code|Hn|Sub|Lbl|Em|Strong|Span|Quote|Link|Reference|Annot|Form|Ruby|Warichu|FENote|Index|L|BibEntry|Table|Figure|Formula|Artifact)*) # ) } figure-attributes = attribute actualtext {text}?& # ActualText attribute alt {text}? # Alt Figure = element pdf2:Figure { attribute lang {text}?, # Lang attribute expanded {text}?, # E attribute title {text}?, # T attribute id {text}?, # ID layout-attributes, otherns-attributes, showtags-attributes, attribute Layout:Width {text}?, attribute Layout:Height {text}?, figure-attributes, ( (H? & Caption? & (Part|Art|Div|Sect|Aside|BlockQuote|NonStruct|Private|P|Note|Code|Hn|Sub|Lbl|Link|Reference|Annot|Form|Ruby|Warichu|FENote|Index|L|BibEntry|Table|Figure|Formula|Artifact)*) # Grouping | (H? & Caption? & (text|Part|Art|Div|Sect|Aside|BlockQuote|NonStruct|Private|P|Note|Code|Hn|Lbl|Em|Strong|Span|Quote|Link|Reference|Annot|Form|Ruby|Warichu|FENote|Index|L|BibEntry|Table|Figure|Formula|Artifact)*) # ) } ### Formula Formula = element pdf2:Formula { pdf2-attributes, attribute Layout:Width {text}?, attribute Layout:Height {text}?, (H? & Caption? & (text|Part|Div|Aside|BlockQuote|NonStruct|Private|P|Note|Code|Hn|Sub|Lbl|Em|Strong|Span|Quote|Link|Reference|Annot|Form|Ruby|Warichu|FENote|Index|L|BibEntry|Table|Figure|Formula|Artifact| math)*) } ### Artifact Artifact = element pdf2:Artifact { pdf1rolemap-Any, pdf2-attributes, artifact-attributes, (H? & Caption & (text|Document|DocumentFragment|Part|Art|Div|Sect|TOC|TOCI|Aside|BlockQuote|NonStruct|Private|P|Note|Code|Hn|Title|Sub|Lbl|Em|Strong|Span|Quote|Link|Reference|Annot|Form|Ruby|RB|RT|RP|Warichu|WT|WP|FENote|Index|L|LI|LBody|BibEntry|Table|TR|TH|TD|THead|TBody|TFoot|Figure|Formula|Artifact)*) } ### PDF1 Art = element pdf1:Art { pdf2-attributes, (H?& (DocumentFragment|Part|Div|Sect|TOC|Aside|BlockQuote|NonStruct|Private|P|Note|Code|Hn|Title|Lbl|Link|Annot|Form|FENote|Index|L|Table|Caption|Figure|Formula|Artifact)*) } Private = element pdf1:Private { pdf2-attributes, (text|Document|DocumentFragment|Part|Art|Div|Sect|TOC|TOCI|Aside|BlockQuote|NonStruct|Private|P|Note|Code|Hn|H|Title|Sub|Lbl|Em|Strong|Span|Quote|Link|Reference|Annot|Form|Ruby|RB|RT|RP|Warichu|WT|WP|FENote|Index|L|LI|LBody|BibEntry|Table|TR|TH|TD|THead|TBody|TFoot|Caption|Figure|Formula|Artifact)* } TOC = element pdf1:TOC { pdf2-attributes, (Caption? & (Part|TOC|TOCI|NonStruct|Private|Artifact)*) } TOCI = element pdf1:TOCI { pdf2-attributes, (Div|TOC|NonStruct|Private|P|Lbl|Reference|Artifact)* } Note = element pdf1:Note { pdf2-attributes, ( (DocumentFragment|Part|Art|Div|Sect|Aside|BlockQuote|NonStruct|Private|P|Note|Sub|Lbl|Em|Strong|Link|Annot|Form|Ruby|Warichu|FENote|Index|L|Table|Caption|Figure|Formula|Artifact)* # Grouping | (text|Part|Art|Div|Sect|Aside|BlockQuote|NonStruct|Private|P|Note|Code|Sub|Lbl|Em|Strong|Span|Quote|Link|Reference|Annot|Form|Ruby|Warichu|FENote|Index|L|BibEntry|Table|Figure|Formula|Artifact)* # Block ) } Reference = element pdf1:Reference { pdf2-attributes, (text|NonStruct|Private|Note|Lbl|Em|Strong|Span|Link|Annot|FENote|BibEntry|Figure|Artifact)* } Code = element pdf1:Code { pdf2-attributes, ( (DocumentFragment|Part|Art|Div|Sect|Aside|BlockQuote|NonStruct|Private|P|Note|Sub|Lbl|Em|Strong|Link|Annot|Form|Ruby|Warichu|FENote|Index|L|Table|Caption|Figure|Formula|Artifact)* # Grouping | (text|Part|Art|Div|Sect|Aside|BlockQuote|NonStruct|Private|P|Note|Code|Sub|Lbl|Em|Strong|Span|Quote|Link|Reference|Annot|Form|Ruby|Warichu|FENote|Index|L|BibEntry|Table|Figure|Formula|Artifact)* # Block ) } # Aside + Title BlockQuote = element pdf1:BlockQuote { pdf2-attributes, (H?&Caption?& (text|Document|DocumentFragment|Part|Art|Div|Sect|TOC|BlockQuote|NonStruct|Private|P|Note|Code|Hn|Title|Lbl|Link|Reference|Annot|Form|FENote|Index|L|Table|Figure|Formula|Artifact)*) } Quote = element pdf1:Quote { pdf2-attributes, (text|NonStruct|Private|Note|Code|Sub|Lbl|Em|Strong|Span|Quote|Link|Reference|Annot|Form|Ruby|Warichu|FENote|BibEntry|Figure|Formula|Artifact)* }