% \iffalse meta-comment % %% File: tagpdf-mc-shared.dtx % % Copyright (C) 2019-2025 Ulrike Fischer % % It may be distributed and/or modified under the conditions of the % LaTeX Project Public License (LPPL), either version 1.3c of this % license or (at your option) any later version. The latest version % of this license is in the file % % https://www.latex-project.org/lppl.txt % % This file is part of the "tagpdf bundle" (The Work in LPPL) % and all files in that bundle must be distributed together. % % ----------------------------------------------------------------------- % % The development version of the bundle can be found at % % https://github.com/latex3/tagpdf % % for those people who are interested. %<*driver> \DocumentMetadata{} \documentclass{l3doc} \usepackage{array,booktabs,caption} \hypersetup{pdfauthor=Ulrike Fischer, pdftitle=tagpdf-mc module (tagpdf)} \begin{document} \DocInput{\jobname.dtx} \end{document} % % \fi % \title{^^A % The \pkg{tagpdf-mc-shared} module\\ Code related to Marked Content (mc-chunks), code shared by all modes ^^A % \\ Part of the tagpdf package % } % % \author{^^A % Ulrike Fischer\thanks % {^^A % E-mail: % \href{mailto:fischer@troubleshooting-tex.de} % {fischer@troubleshooting-tex.de}^^A % }^^A % } % % \date{Version 0.99l, released 2025-01-12} % \maketitle % \begin{documentation} % \section{Public Commands} % \begin{function}{\tag_mc_begin:n,\tag_mc_end:} % \begin{syntax} % \cs{tag_mc_begin:n} \Arg{key-values}\\ % \cs{tag_mc_end:} % \end{syntax} % These commands insert the end code of the marked content. % They don't end a group and in generic mode it doesn't matter % if they are in another group as the starting commands. % In generic mode both commands check if they are correctly nested % and issue a warning if not. % \end{function} % % \begin{function}{\tag_mc_use:n} % \begin{syntax} % \cs{tag_mc_use:n} \Arg{label} % \end{syntax} % These command allow to record a marked content that was stashed away before % into the current structure. A marked content can be used only once -- % the command will issue a warning if an mc is use a second time. % \end{function} % % \begin{function}[added = 2019-11-20] % { % \tag_mc_artifact_group_begin:n, \tag_mc_artifact_group_end: % } % \begin{syntax} % \cs{tag_mc_artifact_group_begin:n} \Arg{name}\\ % \cs{tag_mc_artifact_group_end:} % \end{syntax} % This command pair creates a group with an artifact marker at the begin % and the end. Inside the group the tagging commands are disabled. % It allows to mark a complete region as artifact without having to worry % about user commands with tagging commands. % \meta{name} should be a value allowed also for the |artifact| key. % It pushes and pops mc-chunks at the begin and end. % TODO: document is in tagpdf.tex % \end{function} % % \begin{function}[added = 2021-04-22] % { % \tag_mc_end_push:, \tag_mc_begin_pop:n % } % \begin{syntax} % \cs{tag_mc_end_push:} \\ % \cs{tag_mc_begin_pop:n} \Arg{key-values} % \end{syntax} % If there is an open mc chunk, % \cs{tag_mc_end_push:} ends it and pushes its tag of the (global) stack. % If there is no open chunk, it puts $-1$ on the stack (for debugging) % \cs{tag_mc_begin_pop:n} removes a value from the stack. If it is different from % $-1$ it opens a tag with it. % The reopened mc chunk looses info like the alt text for now. % \end{function} % % \begin{function}[pTF, EXP]{\tag_mc_if_in:} % \begin{syntax} % \cs{tag_mc_if_in:TF} \Arg{true code} \Arg{false code} % \end{syntax} % Determines if a mc-chunk is open. % \end{function} % % \begin{function}[ EXP,added=2023-06-11]{\tag_mc_reset_box:N} % \begin{syntax} % \cs{tag_mc_reset_box:N} \meta{box} % \end{syntax} % This resets in lua mode the mc attributes to the one currently in use. % It does nothing in generic mode. % \end{function} % % \begin{function}[added=2024-11-18]{\tag_mc_add_missing_to_stream:Nn} % \begin{syntax} % \cs{tag_mc_add_missing_to_stream:Nn} \meta{box} \Arg{stream name} % \end{syntax} % This command is only needed in generic mode, in lua mode it gobbles its arguments. % In generic mode it adds MC literals to the stream that are missing because of % page breaks. % The first argument is the box with the stream, the second a % string representing the stream. Predeclared are the names % \texttt{main}, \texttt{footnote} and \texttt{multicol}. % If more streams should be handle the underlying interface must be enabled % with \cs{tag_mc_new_stream:n} % The command is only for packages doing deep manipulations % of the output routine! % Example of use are in the multicol package and in tagpdf itself. % \end{function} % % \begin{function}[added=2024-11-18]{\tag_mc_new_stream:n} % \begin{syntax} % \cs{tag_mc_new_stream:n} \Arg{stream name} % \end{syntax} % This declares the interface needed to handle % a new stream with \cs{tag_mc_add_missing_to_stream:Nn}. % Predeclared are the names \texttt{main}, \texttt{footnote} and \texttt{multicol}. % \end{function} % % \section{Public keys} % The following keys can be used with |\tag_mc_begin:n|, |\tagmcbegin|, % |\tag_mc_begin_pop:n|, % % \begin{function}{tag (mc-key)} % This key is required, unless artifact is used. % The value is a tag like |P| or |H1| without a slash at the begin, % this is added by the code. % It is possible to setup new tags. % The value of the key is expanded, so it can be a command. % The expansion is passed unchanged to the PDF, % so it should with a starting slash give a valid PDF name % (some ascii with numbers like \texttt{H4} is fine). % \end{function} % % \begin{function}{artifact (mc-key)} % This will setup the marked content as an artifact. The key should be used % for content that should be ignored. % The key can take one of the values |pagination|, % |layout|, |page|, |background| and |notype| % (this is the default). % \end{function} % % \begin{function}{raw (mc-key)} % This key allows to add more entries to the properties dictionary. % The value must be correct, low-level PDF. E.g. % \verb+raw=/Alt (Hello)+ will insert an alternative Text. % \end{function} % % \begin{function}{alt (mc-key)} % This key inserts an \texttt{/Alt} value in the property dictionary of the BDC operator. % The value is handled as verbatim string, commands are not expanded. % The value will be expanded first once. If it is empty, nothing will happen. % \end{function} % % \begin{function}{actualtext (mc-key)} % This key inserts an \texttt{/ActualText} value in the property dictionary % of the BDC operator. The value is handled as verbatim string, % commands are not expanded. % The value will be expanded first once. If it is empty, nothing will happen. % \end{function} % % \begin{function}{label (mc-key)} % This key sets a label by which one can call the marked content % later in another structure % (if it has been stashed with the |stash| key). % Internally the label name will start with \texttt{tagpdf-}. % \end{function} % % \begin{function}{stash (mc-key)} % This \enquote{stashes} an mc-chunk: it is not inserted into the current structure. % It should be normally be used along with a label to be able to use the mc-chunk % in another place. % \end{function} % \end{documentation} % \begin{implementation} % The code is split into three parts: code shared by all engines, % code specific to luamode and code not used by luamode. % \section{Marked content code -- shared} % \begin{macrocode} %<@@=tag> %<*header> \ProvidesExplPackage {tagpdf-mc-code-shared} {2025-01-12} {0.99l} {part of tagpdf - code related to marking chunks - code shared by generic and luamode } % % \end{macrocode} % % \subsection{Variables and counters} % MC chunks must be counted. % I use a latex counter for the absolute count, so that it is added to % |\cl@@ckpt| and restored e.g. in tabulars and align. % |\int_new:N \c@g_@@_MCID_abs_int| and % |\tl_put_right:Nn\cl@@ckpt{\@elt{g_@@_MCID_abs_int}}| % would work too, but as the name is not expl3 then too, why bother? % The absolute counter can be used to label and to check if the page % counter needs a reset. % % \begin{variable}{g_@@_MCID_abs_int} % \begin{macrocode} %<*base> \newcounter { g_@@_MCID_abs_int } % \end{macrocode} % \end{variable} % \begin{macro}{\@@_get_data_mc_counter:} % This command allows \cs{tag_get:n} to get the current % state of the mc counter with the keyword |mc_counter|. % By comparing the numbers it can be used to check the number of % structure commands in a piece of code. % \begin{macrocode} \cs_new:Npn \@@_get_data_mc_counter: { \int_use:N \c@g_@@_MCID_abs_int } % % \end{macrocode} % \end{macro} % % % \begin{macro}{\@@_get_mc_abs_cnt:} % A (expandable) function to get the current value of the cnt. % TODO: duplicate of the previous one, this should be cleaned up. % \begin{macrocode} %<*shared> \cs_new:Npn \@@_get_mc_abs_cnt: { \int_use:N \c@g_@@_MCID_abs_int } % \end{macrocode} % \end{macro} % % % \begin{variable}{\g_@@_in_mc_bool} % This booleans record if a mc is open, to test nesting. % \begin{macrocode} \bool_new:N \g_@@_in_mc_bool % \end{macrocode} % \end{variable} % \begin{variable}{\g_@@_mc_parenttree_prop} % For every chunk we need to know the structure it is in, to % record this in the parent tree. We store this in a property.\\ % key: absolute number of the mc (tagmcabs)\\ % value: the structure number the mc is in % \begin{macrocode} \@@_prop_new_linked:N \g_@@_mc_parenttree_prop % \end{macrocode} % \end{variable} % %\begin{variable}{\g_@@_mc_parenttree_prop} % Some commands (e.g. links) want to close a previous mc and reopen it after % they did their work. For this we create a stack: % \begin{macrocode} \seq_new:N \g_@@_mc_stack_seq % \end{macrocode} % \end{variable} % %\begin{variable}{\l_@@_mc_artifact_type_tl} % Artifacts can have various types like Pagination or Layout. This stored % in this variable. % \begin{macrocode} \tl_new:N \l_@@_mc_artifact_type_tl % \end{macrocode} % \end{variable} % %\begin{variable}{\l_@@_mc_key_stash_bool,\l_@@_mc_artifact_bool} %This booleans store the stash and artifact status of the mc-chunk. % \begin{macrocode} \bool_new:N \l_@@_mc_key_stash_bool \bool_new:N \l_@@_mc_artifact_bool % \end{macrocode} % \end{variable} % % \begin{variable} % { % \l_@@_mc_key_tag_tl, % \g_@@_mc_key_tag_tl, % \l_@@_mc_key_label_tl, % \l_@@_mc_key_properties_tl % } % Variables used by the keys. |\l_@@_mc_key_properties_tl| % will collect a number of values. TODO: should this be a pdfdict now? % \begin{macrocode} \tl_new:N \l_@@_mc_key_tag_tl \tl_new:N \g_@@_mc_key_tag_tl \tl_new:N \l_@@_mc_key_label_tl \tl_new:N \l_@@_mc_key_properties_tl % \end{macrocode} % \end{variable} % % % \subsection{Functions} % % \begin{macro}{\@@_mc_handle_mc_label:e} % The commands labels a mc-chunk. It is used if the user explicitly % labels the mc-chunk with the |label| key. The argument is the % value provided by the user. It stores the attributes\\ % |tagabspage|: the absolute page, |\g_shipout_readonly_int|,\\ % |tagmcabs|: the absolute mc-counter |\c@g_@@_MCID_abs_int|. % The reference command is based on l3ref. % \begin{macrocode} \cs_new:Npn \@@_mc_handle_mc_label:e #1 { \@@_property_record:en{tagpdf-#1}{tagabspage,tagmcabs} } % \end{macrocode} % \end{macro} % % \begin{macro}{\@@_mc_set_label_used:n} % Unlike with structures we can't check if a labeled mc has been used by % looking at the P key, so we use a dedicated csname for the test % \begin{macrocode} \cs_new_protected:Npn \@@_mc_set_label_used:n #1 %#1 labelname { \tl_new:c { g_@@_mc_label_\tl_to_str:n{#1}_used_tl } } % % \end{macrocode} % % \end{macro} % \begin{macro}{\tag_mc_use:n} % These command allow to record a marked content that was stashed away before % into the current structure. A marked content can be used only once -- % the command will issue a warning if an mc is use a second time. % The argument is a label name set with the |label| key. % % TODO: is testing for struct the right test? % \begin{macrocode} %\cs_new_protected:Npn \tag_mc_use:n #1 { \@@_whatsits: } %<*shared> \cs_set_protected:Npn \tag_mc_use:n #1 %#1: label name { \@@_check_if_active_struct:T { \tl_set:Ne \l_@@_tmpa_tl { \property_ref:nnn{tagpdf-#1}{tagmcabs}{} } \tl_if_empty:NTF\l_@@_tmpa_tl { \msg_warning:nnn {tag} {mc-label-unknown} {#1} } { \cs_if_free:cTF { g_@@_mc_label_\tl_to_str:n{#1}_used_tl } { \@@_mc_handle_stash:e { \l_@@_tmpa_tl } \@@_mc_set_label_used:n {#1} } { \msg_warning:nnn {tag}{mc-used-twice}{#1} } } } } % % \end{macrocode} % \end{macro} % \begin{macro} % { % \tag_mc_artifact_group_begin:n, % \tag_mc_artifact_group_end: % } % This opens an artifact of the type given in the argument, % and then stops all tagging. It creates a group. % It pushes and pops mc-chunks at the begin and end. % \begin{macrocode} %\cs_new_protected:Npn \tag_mc_artifact_group_begin:n #1 {} %\cs_new_protected:Npn \tag_mc_artifact_group_end:{} %<*shared> \cs_set_protected:Npn \tag_mc_artifact_group_begin:n #1 { \tag_mc_end_push: \tag_mc_begin:n {artifact=#1} \group_begin: \tag_suspend:n{artifact-group} } \cs_set_protected:Npn \tag_mc_artifact_group_end: { \tag_resume:n{artifact-group} \group_end: \tag_mc_end: \tag_mc_begin_pop:n{} } % % \end{macrocode} % \end{macro} % \begin{macro}{\tag_mc_reset_box:N} % This allows to reset the mc-attributes in box. On base and generic mode it should do % nothing. % \begin{macrocode} %\cs_new_protected:Npn \tag_mc_reset_box:N #1 {} % \end{macrocode} % \end{macro} % \begin{macro}{\tag_mc_end_push:, \tag_mc_begin_pop:n} % % \begin{macrocode} %\cs_new_protected:Npn \tag_mc_end_push: {} %\cs_new_protected:Npn \tag_mc_begin_pop:n #1 {} %<*shared> \cs_set_protected:Npn \tag_mc_end_push: { \@@_check_if_active_mc:T { \@@_mc_if_in:TF { \seq_gpush:Ne \g_@@_mc_stack_seq { \tag_get:n {mc_tag} } \@@_check_mc_pushed_popped:nn { pushed } { \tag_get:n {mc_tag} } \tag_mc_end: } { \seq_gpush:Nn \g_@@_mc_stack_seq {-1} \@@_check_mc_pushed_popped:nn { pushed }{-1} } } } \cs_set_protected:Npn \tag_mc_begin_pop:n #1 { \@@_check_if_active_mc:T { \seq_gpop:NNTF \g_@@_mc_stack_seq \l_@@_tmpa_tl { \tl_if_eq:NnTF \l_@@_tmpa_tl {-1} { \@@_check_mc_pushed_popped:nn {popped}{-1} } { \@@_check_mc_pushed_popped:nn {popped}{\l_@@_tmpa_tl} \tag_mc_begin:n {tag=\l_@@_tmpa_tl,#1} } } { \@@_check_mc_pushed_popped:nn {popped}{empty~stack,~nothing} } } } % \end{macrocode} % \end{macro} % % \subsection{Keys} % This are the keys where the code can be shared between the modes. % % \begin{macro}{stash (mc-key),__artifact-bool,__artifact-type} % the two internal artifact keys are use to define the public |artifact|. % For now we add support for the subtypes Header and Footer. % Watermark,PageNum, LineNum,Redaction,Bates will be added if some use case % emerges. If some use case for /BBox and /Attached emerges, it will be perhaps % necessary to adapt the code. % \begin{macrocode} \keys_define:nn { @@ / mc } { stash .bool_set:N = \l_@@_mc_key_stash_bool, __artifact-bool .bool_set:N = \l_@@_mc_artifact_bool, __artifact-type .choice:, __artifact-type / pagination .code:n = { \tl_set:Nn \l_@@_mc_artifact_type_tl { Pagination } }, __artifact-type / pagination/header .code:n = { \tl_set:Nn \l_@@_mc_artifact_type_tl { Pagination/Subtype/Header } }, __artifact-type / pagination/footer .code:n = { \tl_set:Nn \l_@@_mc_artifact_type_tl { Pagination/Subtype/Footer } }, __artifact-type / layout .code:n = { \tl_set:Nn \l_@@_mc_artifact_type_tl { Layout } }, __artifact-type / page .code:n = { \tl_set:Nn \l_@@_mc_artifact_type_tl { Page } }, __artifact-type / background .code:n = { \tl_set:Nn \l_@@_mc_artifact_type_tl { Background } }, __artifact-type / notype .code:n = { \tl_set:Nn \l_@@_mc_artifact_type_tl {} }, __artifact-type / .code:n = { \tl_set:Nn \l_@@_mc_artifact_type_tl {} }, } % \end{macrocode} % \end{macro} % \begin{macrocode} % % \end{macrocode} % \end{implementation} % \PrintIndex