View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2000-2020, University of Amsterdam
    7                              VU University Amsterdam
    8                              CWI, Amsterdam
    9    All rights reserved.
   10
   11    Redistribution and use in source and binary forms, with or without
   12    modification, are permitted provided that the following conditions
   13    are met:
   14
   15    1. Redistributions of source code must retain the above copyright
   16       notice, this list of conditions and the following disclaimer.
   17
   18    2. Redistributions in binary form must reproduce the above copyright
   19       notice, this list of conditions and the following disclaimer in
   20       the documentation and/or other materials provided with the
   21       distribution.
   22
   23    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   24    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   25    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   26    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   27    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   28    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   29    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   30    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   31    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   33    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   34    POSSIBILITY OF SUCH DAMAGE.
   35*/
   36
   37:- module(sgml,
   38          [ load_html/3,                % +Input, -DOM, +Options
   39            load_xml/3,                 % +Input, -DOM, +Options
   40            load_sgml/3,                % +Input, -DOM, +Options
   41
   42            load_sgml_file/2,           % +File, -ListOfContent
   43            load_xml_file/2,            % +File, -ListOfContent
   44            load_html_file/2,           % +File, -Document
   45
   46            load_structure/3,           % +File, -Term, +Options
   47
   48            load_dtd/2,                 % +DTD, +File
   49            load_dtd/3,                 % +DTD, +File, +Options
   50            dtd/2,                      % +Type, -DTD
   51            dtd_property/2,             % +DTD, ?Property
   52
   53            new_dtd/2,                  % +Doctype, -DTD
   54            free_dtd/1,                 % +DTD
   55            open_dtd/3,                 % +DTD, +Options, -Stream
   56
   57            new_sgml_parser/2,          % -Parser, +Options
   58            free_sgml_parser/1,         % +Parser
   59            set_sgml_parser/2,          % +Parser, +Options
   60            get_sgml_parser/2,          % +Parser, +Options
   61            sgml_parse/2,               % +Parser, +Options
   62
   63            sgml_register_catalog_file/2, % +File, +StartOrEnd
   64
   65            xml_quote_attribute/3,      % +In, -Quoted, +Encoding
   66            xml_quote_cdata/3,          % +In, -Quoted, +Encoding
   67            xml_quote_attribute/2,      % +In, -Quoted
   68            xml_quote_cdata/2,          % +In, -Quoted
   69            xml_name/1,                 % +In
   70            xml_name/2,                 % +In, +Encoding
   71
   72            xsd_number_string/2,        % ?Number, ?String
   73            xsd_time_string/3,          % ?Term, ?Type, ?String
   74
   75            xml_basechar/1,             % +Code
   76            xml_ideographic/1,          % +Code
   77            xml_combining_char/1,       % +Code
   78            xml_digit/1,                % +Code
   79            xml_extender/1,             % +Code
   80
   81            iri_xml_namespace/2,        % +IRI, -Namespace
   82            iri_xml_namespace/3,        % +IRI, -Namespace, -LocalName
   83            xml_is_dom/1                % +Term
   84          ]).   85:- autoload(library(error),[instantiation_error/1]).   86:- autoload(library(iostream),[open_any/5,close_any/1]).   87:- autoload(library(lists),[member/2,selectchk/3]).   88:- autoload(library(option),[select_option/3,merge_options/3]).   89
   90:- meta_predicate
   91    load_structure(+, -, :),
   92    load_html(+, -, :),
   93    load_xml(+, -, :),
   94    load_sgml(+, -, :).   95
   96:- predicate_options(load_structure/3, 3,
   97                     [ charpos(integer),
   98                       cdata(oneof([atom,string])),
   99                       defaults(boolean),
  100                       dialect(oneof([html,html4,html5,sgml,xhtml,xhtml5,xml,xmlns])),
  101                       doctype(atom),
  102                       dtd(any),
  103                       encoding(oneof(['iso-8859-1', 'utf-8', 'us-ascii'])),
  104                       entity(atom,atom),
  105                       keep_prefix(boolean),
  106                       file(atom),
  107                       line(integer),
  108                       offset(integer),
  109                       number(oneof([token,integer])),
  110                       qualify_attributes(boolean),
  111                       shorttag(boolean),
  112                       case_sensitive_attributes(boolean),
  113                       case_preserving_attributes(boolean),
  114                       system_entities(boolean),
  115                       max_memory(integer),
  116                       ignore_doctype(boolean),
  117                       space(oneof([sgml,preserve,default,remove,strict])),
  118                       xmlns(atom),
  119                       xmlns(atom,atom),
  120                       pass_to(sgml_parse/2, 2)
  121                     ]).  122:- predicate_options(load_html/3, 3,
  123                     [ pass_to(load_structure/3, 3)
  124                     ]).  125:- predicate_options(load_xml/3, 3,
  126                     [ pass_to(load_structure/3, 3)
  127                     ]).  128:- predicate_options(load_sgml/3, 3,
  129                     [ pass_to(load_structure/3, 3)
  130                     ]).  131:- predicate_options(load_dtd/3, 3,
  132                     [ dialect(oneof([sgml,xml,xmlns])),
  133                       pass_to(open/4, 4)
  134                     ]).  135:- predicate_options(sgml_parse/2, 2,
  136                     [ call(oneof([begin,end,cdata,pi,decl,error,xmlns,urlns]),
  137                            callable),
  138                       cdata(oneof([atom,string])),
  139                       content_length(integer),
  140                       document(-any),
  141                       max_errors(integer),
  142                       parse(oneof([file,element,content,declaration,input])),
  143                       source(any),
  144                       syntax_errors(oneof([quiet,print,style])),
  145                       xml_no_ns(oneof([error,quiet]))
  146                     ]).  147:- predicate_options(new_sgml_parser/2, 2,
  148                     [ dtd(any)
  149                     ]).  150
  151
  152/** <module> SGML, XML and HTML parser
  153
  154This library allows you to parse SGML, XML   and HTML data into a Prolog
  155data structure. The library defines several families of predicates:
  156
  157  $ High-level predicates :
  158  Most users will only use load_html/3, load_xml/3 or load_sgml/3 to
  159  parse arbitrary input into a _DOM_ structure.  These predicates all
  160  call load_structure/3, which provides more options and may be
  161  used for processing non-standard documents.
  162
  163  The DOM structure can be used by library(xpath) to extract information
  164  from the document.
  165
  166  $ The low-level parser :
  167  The actual parser is written in C and consists of two parts: one for
  168  processing DTD (Document Type Definitions) and one for parsing data.
  169  The data can either be parsed to a Prolog (_DOM_) term or the parser
  170  can perform callbacks for the DOM _events_.
  171
  172  $ Utility predicates :
  173  Finally, this library provides prmitives for classifying characters
  174  and strings according to the XML specification such as xml_name/1 to
  175  verify whether an atom is a valid XML name (identifier).  It also
  176  provides primitives to quote attributes and CDATA elements.
  177*/
  178
  179:- multifile user:file_search_path/2.  180:- dynamic   user:file_search_path/2.  181
  182user:file_search_path(dtd, '.').
  183user:file_search_path(dtd, swi('library/DTD')).
  184
  185sgml_register_catalog_file(File, Location) :-
  186    prolog_to_os_filename(File, OsFile),
  187    '_sgml_register_catalog_file'(OsFile, Location).
  188
  189:- use_foreign_library(foreign(sgml2pl)).  190
  191register_catalog(Base) :-
  192    absolute_file_name(dtd(Base),
  193                           [ extensions([soc]),
  194                             access(read),
  195                             file_errors(fail)
  196                           ],
  197                           SocFile),
  198    sgml_register_catalog_file(SocFile, end).
  199
  200:- initialization
  201    ignore(register_catalog('HTML4')).  202
  203
  204                 /*******************************
  205                 *         DTD HANDLING         *
  206                 *******************************/
  207
  208/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  209Note that concurrent access to DTD objects  is not allowed, and hence we
  210will allocate and destroy them in each   thread.  Possibibly it would be
  211nicer to find out why  concurrent  access   to  DTD's  is  flawed. It is
  212diagnosed to mess with the entity resolution by Fabien Todescato.
  213- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
  214
  215:- thread_local
  216    current_dtd/2.  217:- volatile
  218    current_dtd/2.  219:- thread_local
  220    registered_cleanup/0.  221:- volatile
  222    registered_cleanup/0.  223
  224:- multifile
  225    dtd_alias/2.  226
  227:- create_prolog_flag(html_dialect, html5, [type(atom)]).  228
  229dtd_alias(html4, 'HTML4').
  230dtd_alias(html5, 'HTML5').
  231dtd_alias(html,  DTD) :-
  232    current_prolog_flag(html_dialect, Dialect),
  233    dtd_alias(Dialect, DTD).
  234
  235%!  dtd(+Type, -DTD) is det.
  236%
  237%   DTD is a DTD object created from  the file dtd(Type). Loaded DTD
  238%   objects are cached. Note that  DTD   objects  may  not be shared
  239%   between threads. Therefore, dtd/2  maintains   the  pool  of DTD
  240%   objects  using  a  thread_local  predicate.    DTD  objects  are
  241%   destroyed if a thread terminates.
  242%
  243%   @error existence_error(source_sink, dtd(Type))
  244
  245dtd(Type, DTD) :-
  246    current_dtd(Type, DTD),
  247    !.
  248dtd(Type, DTD) :-
  249    new_dtd(Type, DTD),
  250    (   dtd_alias(Type, Base)
  251    ->  true
  252    ;   Base = Type
  253    ),
  254    absolute_file_name(dtd(Base),
  255                       [ extensions([dtd]),
  256                         access(read)
  257                       ], DtdFile),
  258    load_dtd(DTD, DtdFile),
  259    register_cleanup,
  260    asserta(current_dtd(Type, DTD)).
  261
  262%!  load_dtd(+DTD, +DtdFile, +Options)
  263%
  264%   Load DtdFile into a DTD.  Defined options are:
  265%
  266%           * dialect(+Dialect)
  267%           Dialect to use (xml, xmlns, sgml)
  268%
  269%           * encoding(+Encoding)
  270%           Encoding of DTD file
  271%
  272%   @param  DTD is a fresh DTD object, normally created using
  273%           new_dtd/1.
  274
  275load_dtd(DTD, DtdFile) :-
  276    load_dtd(DTD, DtdFile, []).
  277load_dtd(DTD, DtdFile, Options) :-
  278    sgml_open_options(sgml:Options, OpenOptions, sgml:DTDOptions),
  279    setup_call_cleanup(
  280        open_dtd(DTD, DTDOptions, DtdOut),
  281        setup_call_cleanup(
  282            open(DtdFile, read, DtdIn, OpenOptions),
  283            copy_stream_data(DtdIn, DtdOut),
  284            close(DtdIn)),
  285        close(DtdOut)).
  286
  287%!  destroy_dtds
  288%
  289%   Destroy  DTDs  cached  by  this  thread   as  they  will  become
  290%   unreachable anyway.
  291
  292destroy_dtds :-
  293    (   current_dtd(_Type, DTD),
  294        free_dtd(DTD),
  295        fail
  296    ;   true
  297    ).
  298
  299%!  register_cleanup
  300%
  301%   Register cleanup of DTDs created for this thread.
  302
  303register_cleanup :-
  304    registered_cleanup,
  305    !.
  306register_cleanup :-
  307    (   thread_self(main)
  308    ->  at_halt(destroy_dtds)
  309    ;   current_prolog_flag(threads, true)
  310    ->  prolog_listen(this_thread_exit, destroy_dtds)
  311    ;   true
  312    ),
  313    assert(registered_cleanup).
  314
  315
  316                 /*******************************
  317                 *          EXAMINE DTD         *
  318                 *******************************/
  319
  320prop(doctype(_), _).
  321prop(elements(_), _).
  322prop(entities(_), _).
  323prop(notations(_), _).
  324prop(entity(E, _), DTD) :-
  325    (   nonvar(E)
  326    ->  true
  327    ;   '$dtd_property'(DTD, entities(EL)),
  328        member(E, EL)
  329    ).
  330prop(element(E, _, _), DTD) :-
  331    (   nonvar(E)
  332    ->  true
  333    ;   '$dtd_property'(DTD, elements(EL)),
  334        member(E, EL)
  335    ).
  336prop(attributes(E, _), DTD) :-
  337    (   nonvar(E)
  338    ->  true
  339    ;   '$dtd_property'(DTD, elements(EL)),
  340        member(E, EL)
  341    ).
  342prop(attribute(E, A, _, _), DTD) :-
  343    (   nonvar(E)
  344    ->  true
  345    ;   '$dtd_property'(DTD, elements(EL)),
  346        member(E, EL)
  347    ),
  348    (   nonvar(A)
  349    ->  true
  350    ;   '$dtd_property'(DTD, attributes(E, AL)),
  351        member(A, AL)
  352    ).
  353prop(notation(N, _), DTD) :-
  354    (   nonvar(N)
  355    ->  true
  356    ;   '$dtd_property'(DTD, notations(NL)),
  357        member(N, NL)
  358    ).
  359
  360dtd_property(DTD, Prop) :-
  361    prop(Prop, DTD),
  362    '$dtd_property'(DTD, Prop).
  363
  364
  365                 /*******************************
  366                 *             SGML             *
  367                 *******************************/
  368
  369%!  load_structure(+Source, -ListOfContent, :Options) is det.
  370%
  371%   Parse   Source   and   return   the   resulting   structure   in
  372%   ListOfContent. Source is handed to  open_any/5, which allows for
  373%   processing an extensible set of input sources.
  374%
  375%   A proper XML document contains only   a  single toplevel element
  376%   whose name matches the document type.   Nevertheless,  a list is
  377%   returned for consistency with  the   representation  of  element
  378%   content.
  379%
  380%   The  encoding(+Encoding)  option   is    treated   special   for
  381%   compatibility reasons:
  382%
  383%     - If `Encoding` is one of =iso-8859-1=, =us-ascii= or =utf-8=,
  384%       the stream is opened in binary mode and the option is passed
  385%       to the SGML parser.
  386%     - If `Encoding` is present, but not one of the above, the
  387%       stream is opened in text mode using the given encoding.
  388%     - Otherwise (no `Encoding`), the stream is opened in binary
  389%       mode and doing the correct decoding is left to the parser.
  390
  391load_structure(Spec, DOM, Options) :-
  392    sgml_open_options(Options, OpenOptions, SGMLOptions),
  393    setup_call_cleanup(
  394        open_any(Spec, read, In, Close, OpenOptions),
  395        load_structure_from_stream(In, DOM, SGMLOptions),
  396        close_any(Close)).
  397
  398sgml_open_options(Options, OpenOptions, SGMLOptions) :-
  399    Options = M:Plain,
  400    (   select_option(encoding(Encoding), Plain, NoEnc)
  401    ->  (   sgml_encoding(Encoding)
  402        ->  merge_options(NoEnc, [type(binary)], OpenOptions),
  403            SGMLOptions = Options
  404        ;   OpenOptions = Plain,
  405            SGMLOptions = M:NoEnc
  406        )
  407    ;   merge_options(Plain, [type(binary)], OpenOptions),
  408        SGMLOptions = Options
  409    ).
  410
  411sgml_encoding(Enc) :-
  412    downcase_atom(Enc, Enc1),
  413    sgml_encoding_l(Enc1).
  414
  415sgml_encoding_l('iso-8859-1').
  416sgml_encoding_l('us-ascii').
  417sgml_encoding_l('utf-8').
  418sgml_encoding_l('utf8').
  419sgml_encoding_l('iso_latin_1').
  420sgml_encoding_l('ascii').
  421
  422load_structure_from_stream(In, Term, M:Options) :-
  423    (   select_option(dtd(DTD), Options, Options1)
  424    ->  ExplicitDTD = true
  425    ;   ExplicitDTD = false,
  426        Options1 = Options
  427    ),
  428    move_front(Options1, dialect(_), Options2), % dialect sets defaults
  429    setup_call_cleanup(
  430        new_sgml_parser(Parser,
  431                        [ dtd(DTD)
  432                        ]),
  433        parse(Parser, M:Options2, TermRead, In),
  434        free_sgml_parser(Parser)),
  435    (   ExplicitDTD == true
  436    ->  (   DTD = dtd(_, DocType),
  437            dtd_property(DTD, doctype(DocType))
  438        ->  true
  439        ;   true
  440        )
  441    ;   free_dtd(DTD)
  442    ),
  443    Term = TermRead.
  444
  445move_front(Options0, Opt, Options) :-
  446    selectchk(Opt, Options0, Options1),
  447    !,
  448    Options = [Opt|Options1].
  449move_front(Options, _, Options).
  450
  451
  452parse(Parser, M:Options, Document, In) :-
  453    set_parser_options(Options, Parser, In, Options1),
  454    parser_meta_options(Options1, M, Options2),
  455    set_input_location(Parser, In),
  456    sgml_parse(Parser,
  457               [ document(Document),
  458                 source(In)
  459               | Options2
  460               ]).
  461
  462set_parser_options([], _, _, []).
  463set_parser_options([H|T], Parser, In, Rest) :-
  464    (   set_parser_option(H, Parser, In)
  465    ->  set_parser_options(T, Parser, In, Rest)
  466    ;   Rest = [H|R2],
  467        set_parser_options(T, Parser, In, R2)
  468    ).
  469
  470set_parser_option(Var, _Parser, _In) :-
  471    var(Var),
  472    !,
  473    instantiation_error(Var).
  474set_parser_option(Option, Parser, _) :-
  475    def_entity(Option, Parser),
  476    !.
  477set_parser_option(offset(Offset), _Parser, In) :-
  478    !,
  479    seek(In, Offset, bof, _).
  480set_parser_option(Option, Parser, _In) :-
  481    parser_option(Option),
  482    !,
  483    set_sgml_parser(Parser, Option).
  484set_parser_option(Name=Value, Parser, In) :-
  485    Option =.. [Name,Value],
  486    set_parser_option(Option, Parser, In).
  487
  488
  489parser_option(dialect(_)).
  490parser_option(shorttag(_)).
  491parser_option(case_sensitive_attributes(_)).
  492parser_option(case_preserving_attributes(_)).
  493parser_option(system_entities(_)).
  494parser_option(max_memory(_)).
  495parser_option(ignore_doctype(_)).
  496parser_option(file(_)).
  497parser_option(line(_)).
  498parser_option(space(_)).
  499parser_option(number(_)).
  500parser_option(defaults(_)).
  501parser_option(doctype(_)).
  502parser_option(qualify_attributes(_)).
  503parser_option(encoding(_)).
  504parser_option(keep_prefix(_)).
  505
  506
  507def_entity(entity(Name, Value), Parser) :-
  508    get_sgml_parser(Parser, dtd(DTD)),
  509    xml_quote_attribute(Value, QValue),
  510    setup_call_cleanup(open_dtd(DTD, [], Stream),
  511                       format(Stream, '<!ENTITY ~w "~w">~n',
  512                              [Name, QValue]),
  513                       close(Stream)).
  514def_entity(xmlns(URI), Parser) :-
  515    set_sgml_parser(Parser, xmlns(URI)).
  516def_entity(xmlns(NS, URI), Parser) :-
  517    set_sgml_parser(Parser, xmlns(NS, URI)).
  518
  519%!  parser_meta_options(+Options0, +Module, -Options)
  520%
  521%   Qualify meta-calling options to the parser.
  522
  523parser_meta_options([], _, []).
  524parser_meta_options([call(When, Closure)|T0], M, [call(When, M:Closure)|T]) :-
  525    !,
  526    parser_meta_options(T0, M, T).
  527parser_meta_options([H|T0], M, [H|T]) :-
  528    parser_meta_options(T0, M, T).
  529
  530
  531%!  set_input_location(+Parser, +In:stream) is det.
  532%
  533%   Set the input location if this was not set explicitly
  534
  535set_input_location(Parser, _In) :-
  536    get_sgml_parser(Parser, file(_)),
  537    !.
  538set_input_location(Parser, In) :-
  539    stream_property(In, file_name(File)),
  540    !,
  541    set_sgml_parser(Parser, file(File)),
  542    stream_property(In, position(Pos)),
  543    set_sgml_parser(Parser, position(Pos)).
  544set_input_location(_, _).
  545
  546                 /*******************************
  547                 *           UTILITIES          *
  548                 *******************************/
  549
  550%!  load_sgml_file(+File, -DOM) is det.
  551%
  552%   Load SGML from File and unify   the resulting DOM structure with
  553%   DOM.
  554%
  555%   @deprecated     New code should use load_sgml/3.
  556
  557load_sgml_file(File, Term) :-
  558    load_sgml(File, Term, []).
  559
  560%!  load_xml_file(+File, -DOM) is det.
  561%
  562%   Load XML from File and unify   the  resulting DOM structure with
  563%   DOM.
  564%
  565%   @deprecated     New code should use load_xml/3.
  566
  567load_xml_file(File, Term) :-
  568    load_xml(File, Term, []).
  569
  570%!  load_html_file(+File, -DOM) is det.
  571%
  572%   Load HTML from File and unify   the resulting DOM structure with
  573%   DOM.
  574%
  575%   @deprecated     New code should use load_html/3.
  576
  577load_html_file(File, DOM) :-
  578    load_html(File, DOM, []).
  579
  580%!  load_html(+Input, -DOM, +Options) is det.
  581%
  582%   Load HTML text from Input and  unify the resulting DOM structure
  583%   with DOM. Options are passed   to load_structure/3, after adding
  584%   the following default options:
  585%
  586%     - dtd(DTD)
  587%     Pass the DTD for HTML as obtained using dtd(html, DTD).
  588%     - dialect(Dialect)
  589%     Current dialect from the Prolog flag =html_dialect=
  590%     - max_errors(-1)
  591%     - syntax_errors(quiet)
  592%     Most HTML encountered in the wild contains errors. Even in the
  593%     context of errors, the resulting DOM term is often a
  594%     reasonable guess at the intent of the author.
  595%
  596%   You may also want to use  the library(http/http_open) to support
  597%   loading from HTTP and HTTPS URLs. For example:
  598%
  599%   ==
  600%   :- use_module(library(http/http_open)).
  601%   :- use_module(library(sgml)).
  602%
  603%   load_html_url(URL, DOM) :-
  604%       load_html(URL, DOM, []).
  605%   ==
  606
  607load_html(File, Term, M:Options) :-
  608    current_prolog_flag(html_dialect, Dialect),
  609    dtd(Dialect, DTD),
  610    merge_options(Options,
  611                  [ dtd(DTD),
  612                    dialect(Dialect),
  613                    max_errors(-1),
  614                    syntax_errors(quiet)
  615                  ], Options1),
  616    load_structure(File, Term, M:Options1).
  617
  618%!  load_xml(+Input, -DOM, +Options) is det.
  619%
  620%   Load XML text from Input and   unify the resulting DOM structure
  621%   with DOM. Options are passed   to load_structure/3, after adding
  622%   the following default options:
  623%
  624%     - dialect(xml)
  625
  626load_xml(Input, DOM, M:Options) :-
  627    merge_options(Options,
  628                  [ dialect(xml)
  629                  ], Options1),
  630    load_structure(Input, DOM, M:Options1).
  631
  632%!  load_sgml(+Input, -DOM, +Options) is det.
  633%
  634%   Load SGML text from Input and  unify the resulting DOM structure
  635%   with DOM. Options are passed   to load_structure/3, after adding
  636%   the following default options:
  637%
  638%     - dialect(sgml)
  639
  640load_sgml(Input, DOM, M:Options) :-
  641    merge_options(Options,
  642                  [ dialect(sgml)
  643                  ], Options1),
  644    load_structure(Input, DOM, M:Options1).
  645
  646
  647
  648                 /*******************************
  649                 *            ENCODING          *
  650                 *******************************/
  651
  652%!  xml_quote_attribute(+In, -Quoted) is det.
  653%!  xml_quote_cdata(+In, -Quoted) is det.
  654%
  655%   Backward  compatibility  for  versions  that  allow  to  specify
  656%   encoding. All characters that cannot fit the encoding are mapped
  657%   to XML character entities (&#dd;).  Using   ASCII  is the safest
  658%   value.
  659
  660xml_quote_attribute(In, Quoted) :-
  661    xml_quote_attribute(In, Quoted, ascii).
  662
  663xml_quote_cdata(In, Quoted) :-
  664    xml_quote_cdata(In, Quoted, ascii).
  665
  666%!  xml_name(+Atom) is semidet.
  667%
  668%   True if Atom is a valid XML name.
  669
  670xml_name(In) :-
  671    xml_name(In, ascii).
  672
  673
  674                 /*******************************
  675                 *    XML CHARACTER CLASSES     *
  676                 *******************************/
  677
  678%!  xml_basechar(+CodeOrChar) is semidet.
  679%!  xml_ideographic(+CodeOrChar) is semidet.
  680%!  xml_combining_char(+CodeOrChar) is semidet.
  681%!  xml_digit(+CodeOrChar) is semidet.
  682%!  xml_extender(+CodeOrChar) is semidet.
  683%
  684%   XML  character  classification   predicates.    Each   of  these
  685%   predicates accept both a character   (one-character  atom) and a
  686%   code (integer).
  687%
  688%   @see http://www.w3.org/TR/2006/REC-xml-20060816
  689
  690
  691                 /*******************************
  692                 *         TYPE CHECKING        *
  693                 *******************************/
  694
  695%!  xml_is_dom(@Term) is semidet.
  696%
  697%   True  if  term  statisfies   the    structure   as  returned  by
  698%   load_structure/3 and friends.
  699
  700xml_is_dom(0) :- !, fail.               % catch variables
  701xml_is_dom(List) :-
  702    is_list(List),
  703    !,
  704    xml_is_content_list(List).
  705xml_is_dom(Term) :-
  706    xml_is_element(Term).
  707
  708xml_is_content_list([]).
  709xml_is_content_list([H|T]) :-
  710    xml_is_content(H),
  711    xml_is_content_list(T).
  712
  713xml_is_content(0) :- !, fail.
  714xml_is_content(pi(Pi)) :-
  715    !,
  716    atom(Pi).
  717xml_is_content(CDATA) :-
  718    atom(CDATA),
  719    !.
  720xml_is_content(CDATA) :-
  721    string(CDATA),
  722    !.
  723xml_is_content(Term) :-
  724    xml_is_element(Term).
  725
  726xml_is_element(element(Name, Attributes, Content)) :-
  727    dom_name(Name),
  728    dom_attributes(Attributes),
  729    xml_is_content_list(Content).
  730
  731dom_name(NS:Local) :-
  732    atom(NS),
  733    atom(Local),
  734    !.
  735dom_name(Local) :-
  736    atom(Local).
  737
  738dom_attributes(0) :- !, fail.
  739dom_attributes([]).
  740dom_attributes([H|T]) :-
  741    dom_attribute(H),
  742    dom_attributes(T).
  743
  744dom_attribute(Name=Value) :-
  745    dom_name(Name),
  746    atomic(Value).
  747
  748
  749                 /*******************************
  750                 *            MESSAGES          *
  751                 *******************************/
  752:- multifile
  753    prolog:message/3.  754
  755%       Catch messages.  sgml/4 is generated by the SGML2PL binding.
  756
  757prolog:message(sgml(Parser, File, Line, Message)) -->
  758    { get_sgml_parser(Parser, dialect(Dialect))
  759    },
  760    [ 'SGML2PL(~w): ~w:~w: ~w'-[Dialect, File, Line, Message] ].
  761
  762
  763                 /*******************************
  764                 *         XREF SUPPORT         *
  765                 *******************************/
  766
  767:- multifile
  768    prolog:called_by/2.  769
  770prolog:called_by(sgml_parse(_, Options), Called) :-
  771    findall(Meta, meta_call_term(_, Meta, Options), Called).
  772
  773meta_call_term(T, G+N, Options) :-
  774    T = call(Event, G),
  775    pmember(T, Options),
  776    call_params(Event, Term),
  777    functor(Term, _, N).
  778
  779pmember(X, List) :-                     % member for partial lists
  780    nonvar(List),
  781    List = [H|T],
  782    (   X = H
  783    ;   pmember(X, T)
  784    ).
  785
  786call_params(begin, begin(tag,attributes,parser)).
  787call_params(end,   end(tag,parser)).
  788call_params(cdata, cdata(cdata,parser)).
  789call_params(pi,    pi(cdata,parser)).
  790call_params(decl,  decl(cdata,parser)).
  791call_params(error, error(severity,message,parser)).
  792call_params(xmlns, xmlns(namespace,url,parser)).
  793call_params(urlns, urlns(url,url,parser)).
  794
  795                 /*******************************
  796                 *           SANDBOX            *
  797                 *******************************/
  798
  799:- multifile
  800    sandbox:safe_primitive/1,
  801    sandbox:safe_meta_predicate/1.  802
  803sandbox:safe_meta_predicate(sgml:load_structure/3).
  804sandbox:safe_primitive(sgml:dtd(Dialect, _)) :-
  805    dtd_alias(Dialect, _).
  806sandbox:safe_primitive(sgml:xml_quote_attribute(_,_,_)).
  807sandbox:safe_primitive(sgml:xml_quote_cdata(_,_,_)).
  808sandbox:safe_primitive(sgml:xml_name(_,_)).
  809sandbox:safe_primitive(sgml:xml_basechar(_)).
  810sandbox:safe_primitive(sgml:xml_ideographic(_)).
  811sandbox:safe_primitive(sgml:xml_combining_char(_)).
  812sandbox:safe_primitive(sgml:xml_digit(_)).
  813sandbox:safe_primitive(sgml:xml_extender(_)).
  814sandbox:safe_primitive(sgml:iri_xml_namespace(_,_,_)).
  815sandbox:safe_primitive(sgml:xsd_number_string(_,_)).
  816sandbox:safe_primitive(sgml:xsd_time_string(_,_,_))