% \iffalse meta-comment
%
%% File: tagpdf-mc-shared.dtx
%
% Copyright (C) 2019-2025 Ulrike Fischer
%
% It may be distributed and/or modified under the conditions of the
% LaTeX Project Public License (LPPL), either version 1.3c of this
% license or (at your option) any later version. The latest version
% of this license is in the file
%
% https://www.latex-project.org/lppl.txt
%
% This file is part of the "tagpdf bundle" (The Work in LPPL)
% and all files in that bundle must be distributed together.
%
% -----------------------------------------------------------------------
%
% The development version of the bundle can be found at
%
% https://github.com/latex3/tagpdf
%
% for those people who are interested.
%<*driver>
\DocumentMetadata{}
\documentclass{l3doc}
\usepackage{array,booktabs,caption}
\hypersetup{pdfauthor=Ulrike Fischer,
pdftitle=tagpdf-mc module (tagpdf)}
\begin{document}
\DocInput{\jobname.dtx}
\end{document}
%
% \fi
% \title{^^A
% The \pkg{tagpdf-mc-shared} module\\ Code related to Marked Content (mc-chunks), code shared by all modes ^^A
% \\ Part of the tagpdf package
% }
%
% \author{^^A
% Ulrike Fischer\thanks
% {^^A
% E-mail:
% \href{mailto:fischer@troubleshooting-tex.de}
% {fischer@troubleshooting-tex.de}^^A
% }^^A
% }
%
% \date{Version 0.99l, released 2025-01-12}
% \maketitle
% \begin{documentation}
% \section{Public Commands}
% \begin{function}{\tag_mc_begin:n,\tag_mc_end:}
% \begin{syntax}
% \cs{tag_mc_begin:n} \Arg{key-values}\\
% \cs{tag_mc_end:}
% \end{syntax}
% These commands insert the end code of the marked content.
% They don't end a group and in generic mode it doesn't matter
% if they are in another group as the starting commands.
% In generic mode both commands check if they are correctly nested
% and issue a warning if not.
% \end{function}
%
% \begin{function}{\tag_mc_use:n}
% \begin{syntax}
% \cs{tag_mc_use:n} \Arg{label}
% \end{syntax}
% These command allow to record a marked content that was stashed away before
% into the current structure. A marked content can be used only once --
% the command will issue a warning if an mc is use a second time.
% \end{function}
%
% \begin{function}[added = 2019-11-20]
% {
% \tag_mc_artifact_group_begin:n, \tag_mc_artifact_group_end:
% }
% \begin{syntax}
% \cs{tag_mc_artifact_group_begin:n} \Arg{name}\\
% \cs{tag_mc_artifact_group_end:}
% \end{syntax}
% This command pair creates a group with an artifact marker at the begin
% and the end. Inside the group the tagging commands are disabled.
% It allows to mark a complete region as artifact without having to worry
% about user commands with tagging commands.
% \meta{name} should be a value allowed also for the |artifact| key.
% It pushes and pops mc-chunks at the begin and end.
% TODO: document is in tagpdf.tex
% \end{function}
%
% \begin{function}[added = 2021-04-22]
% {
% \tag_mc_end_push:, \tag_mc_begin_pop:n
% }
% \begin{syntax}
% \cs{tag_mc_end_push:} \\
% \cs{tag_mc_begin_pop:n} \Arg{key-values}
% \end{syntax}
% If there is an open mc chunk,
% \cs{tag_mc_end_push:} ends it and pushes its tag of the (global) stack.
% If there is no open chunk, it puts $-1$ on the stack (for debugging)
% \cs{tag_mc_begin_pop:n} removes a value from the stack. If it is different from
% $-1$ it opens a tag with it.
% The reopened mc chunk looses info like the alt text for now.
% \end{function}
%
% \begin{function}[pTF, EXP]{\tag_mc_if_in:}
% \begin{syntax}
% \cs{tag_mc_if_in:TF} \Arg{true code} \Arg{false code}
% \end{syntax}
% Determines if a mc-chunk is open.
% \end{function}
%
% \begin{function}[ EXP,added=2023-06-11]{\tag_mc_reset_box:N}
% \begin{syntax}
% \cs{tag_mc_reset_box:N} \meta{box}
% \end{syntax}
% This resets in lua mode the mc attributes to the one currently in use.
% It does nothing in generic mode.
% \end{function}
%
% \begin{function}[added=2024-11-18]{\tag_mc_add_missing_to_stream:Nn}
% \begin{syntax}
% \cs{tag_mc_add_missing_to_stream:Nn} \meta{box} \Arg{stream name}
% \end{syntax}
% This command is only needed in generic mode, in lua mode it gobbles its arguments.
% In generic mode it adds MC literals to the stream that are missing because of
% page breaks.
% The first argument is the box with the stream, the second a
% string representing the stream. Predeclared are the names
% \texttt{main}, \texttt{footnote} and \texttt{multicol}.
% If more streams should be handle the underlying interface must be enabled
% with \cs{tag_mc_new_stream:n}
% The command is only for packages doing deep manipulations
% of the output routine!
% Example of use are in the multicol package and in tagpdf itself.
% \end{function}
%
% \begin{function}[added=2024-11-18]{\tag_mc_new_stream:n}
% \begin{syntax}
% \cs{tag_mc_new_stream:n} \Arg{stream name}
% \end{syntax}
% This declares the interface needed to handle
% a new stream with \cs{tag_mc_add_missing_to_stream:Nn}.
% Predeclared are the names \texttt{main}, \texttt{footnote} and \texttt{multicol}.
% \end{function}
%
% \section{Public keys}
% The following keys can be used with |\tag_mc_begin:n|, |\tagmcbegin|,
% |\tag_mc_begin_pop:n|,
%
% \begin{function}{tag (mc-key)}
% This key is required, unless artifact is used.
% The value is a tag like |P| or |H1| without a slash at the begin,
% this is added by the code.
% It is possible to setup new tags.
% The value of the key is expanded, so it can be a command.
% The expansion is passed unchanged to the PDF,
% so it should with a starting slash give a valid PDF name
% (some ascii with numbers like \texttt{H4} is fine).
% \end{function}
%
% \begin{function}{artifact (mc-key)}
% This will setup the marked content as an artifact. The key should be used
% for content that should be ignored.
% The key can take one of the values |pagination|,
% |layout|, |page|, |background| and |notype|
% (this is the default).
% \end{function}
%
% \begin{function}{raw (mc-key)}
% This key allows to add more entries to the properties dictionary.
% The value must be correct, low-level PDF. E.g.
% \verb+raw=/Alt (Hello)+ will insert an alternative Text.
% \end{function}
%
% \begin{function}{alt (mc-key)}
% This key inserts an \texttt{/Alt} value in the property dictionary of the BDC operator.
% The value is handled as verbatim string, commands are not expanded.
% The value will be expanded first once. If it is empty, nothing will happen.
% \end{function}
%
% \begin{function}{actualtext (mc-key)}
% This key inserts an \texttt{/ActualText} value in the property dictionary
% of the BDC operator. The value is handled as verbatim string,
% commands are not expanded.
% The value will be expanded first once. If it is empty, nothing will happen.
% \end{function}
%
% \begin{function}{label (mc-key)}
% This key sets a label by which one can call the marked content
% later in another structure
% (if it has been stashed with the |stash| key).
% Internally the label name will start with \texttt{tagpdf-}.
% \end{function}
%
% \begin{function}{stash (mc-key)}
% This \enquote{stashes} an mc-chunk: it is not inserted into the current structure.
% It should be normally be used along with a label to be able to use the mc-chunk
% in another place.
% \end{function}
% \end{documentation}
% \begin{implementation}
% The code is split into three parts: code shared by all engines,
% code specific to luamode and code not used by luamode.
% \section{Marked content code -- shared}
% \begin{macrocode}
%<@@=tag>
%<*header>
\ProvidesExplPackage {tagpdf-mc-code-shared} {2025-01-12} {0.99l}
{part of tagpdf - code related to marking chunks -
code shared by generic and luamode }
%
% \end{macrocode}
%
% \subsection{Variables and counters}
% MC chunks must be counted.
% I use a latex counter for the absolute count, so that it is added to
% |\cl@@ckpt| and restored e.g. in tabulars and align.
% |\int_new:N \c@g_@@_MCID_abs_int| and
% |\tl_put_right:Nn\cl@@ckpt{\@elt{g_@@_MCID_abs_int}}|
% would work too, but as the name is not expl3 then too, why bother?
% The absolute counter can be used to label and to check if the page
% counter needs a reset.
%
% \begin{variable}{g_@@_MCID_abs_int}
% \begin{macrocode}
%<*base>
\newcounter { g_@@_MCID_abs_int }
% \end{macrocode}
% \end{variable}
% \begin{macro}{\@@_get_data_mc_counter:}
% This command allows \cs{tag_get:n} to get the current
% state of the mc counter with the keyword |mc_counter|.
% By comparing the numbers it can be used to check the number of
% structure commands in a piece of code.
% \begin{macrocode}
\cs_new:Npn \@@_get_data_mc_counter:
{
\int_use:N \c@g_@@_MCID_abs_int
}
%
% \end{macrocode}
% \end{macro}
%
%
% \begin{macro}{\@@_get_mc_abs_cnt:}
% A (expandable) function to get the current value of the cnt.
% TODO: duplicate of the previous one, this should be cleaned up.
% \begin{macrocode}
%<*shared>
\cs_new:Npn \@@_get_mc_abs_cnt: { \int_use:N \c@g_@@_MCID_abs_int }
% \end{macrocode}
% \end{macro}
%
%
% \begin{variable}{\g_@@_in_mc_bool}
% This booleans record if a mc is open, to test nesting.
% \begin{macrocode}
\bool_new:N \g_@@_in_mc_bool
% \end{macrocode}
% \end{variable}
% \begin{variable}{\g_@@_mc_parenttree_prop}
% For every chunk we need to know the structure it is in, to
% record this in the parent tree. We store this in a property.\\
% key: absolute number of the mc (tagmcabs)\\
% value: the structure number the mc is in
% \begin{macrocode}
\@@_prop_new_linked:N \g_@@_mc_parenttree_prop
% \end{macrocode}
% \end{variable}
%
%\begin{variable}{\g_@@_mc_parenttree_prop}
% Some commands (e.g. links) want to close a previous mc and reopen it after
% they did their work. For this we create a stack:
% \begin{macrocode}
\seq_new:N \g_@@_mc_stack_seq
% \end{macrocode}
% \end{variable}
%
%\begin{variable}{\l_@@_mc_artifact_type_tl}
% Artifacts can have various types like Pagination or Layout. This stored
% in this variable.
% \begin{macrocode}
\tl_new:N \l_@@_mc_artifact_type_tl
% \end{macrocode}
% \end{variable}
%
%\begin{variable}{\l_@@_mc_key_stash_bool,\l_@@_mc_artifact_bool}
%This booleans store the stash and artifact status of the mc-chunk.
% \begin{macrocode}
\bool_new:N \l_@@_mc_key_stash_bool
\bool_new:N \l_@@_mc_artifact_bool
% \end{macrocode}
% \end{variable}
%
% \begin{variable}
% {
% \l_@@_mc_key_tag_tl,
% \g_@@_mc_key_tag_tl,
% \l_@@_mc_key_label_tl,
% \l_@@_mc_key_properties_tl
% }
% Variables used by the keys. |\l_@@_mc_key_properties_tl|
% will collect a number of values. TODO: should this be a pdfdict now?
% \begin{macrocode}
\tl_new:N \l_@@_mc_key_tag_tl
\tl_new:N \g_@@_mc_key_tag_tl
\tl_new:N \l_@@_mc_key_label_tl
\tl_new:N \l_@@_mc_key_properties_tl
% \end{macrocode}
% \end{variable}
%
%
% \subsection{Functions}
%
% \begin{macro}{\@@_mc_handle_mc_label:e}
% The commands labels a mc-chunk. It is used if the user explicitly
% labels the mc-chunk with the |label| key. The argument is the
% value provided by the user. It stores the attributes\\
% |tagabspage|: the absolute page, |\g_shipout_readonly_int|,\\
% |tagmcabs|: the absolute mc-counter |\c@g_@@_MCID_abs_int|.
% The reference command is based on l3ref.
% \begin{macrocode}
\cs_new:Npn \@@_mc_handle_mc_label:e #1
{
\@@_property_record:en{tagpdf-#1}{tagabspage,tagmcabs}
}
% \end{macrocode}
% \end{macro}
%
% \begin{macro}{\@@_mc_set_label_used:n}
% Unlike with structures we can't check if a labeled mc has been used by
% looking at the P key, so we use a dedicated csname for the test
% \begin{macrocode}
\cs_new_protected:Npn \@@_mc_set_label_used:n #1 %#1 labelname
{
\tl_new:c { g_@@_mc_label_\tl_to_str:n{#1}_used_tl }
}
%
% \end{macrocode}
%
% \end{macro}
% \begin{macro}{\tag_mc_use:n}
% These command allow to record a marked content that was stashed away before
% into the current structure. A marked content can be used only once --
% the command will issue a warning if an mc is use a second time.
% The argument is a label name set with the |label| key.
%
% TODO: is testing for struct the right test?
% \begin{macrocode}
%\cs_new_protected:Npn \tag_mc_use:n #1 { \@@_whatsits: }
%<*shared>
\cs_set_protected:Npn \tag_mc_use:n #1 %#1: label name
{
\@@_check_if_active_struct:T
{
\tl_set:Ne \l_@@_tmpa_tl { \property_ref:nnn{tagpdf-#1}{tagmcabs}{} }
\tl_if_empty:NTF\l_@@_tmpa_tl
{
\msg_warning:nnn {tag} {mc-label-unknown} {#1}
}
{
\cs_if_free:cTF { g_@@_mc_label_\tl_to_str:n{#1}_used_tl }
{
\@@_mc_handle_stash:e { \l_@@_tmpa_tl }
\@@_mc_set_label_used:n {#1}
}
{
\msg_warning:nnn {tag}{mc-used-twice}{#1}
}
}
}
}
%
% \end{macrocode}
% \end{macro}
% \begin{macro}
% {
% \tag_mc_artifact_group_begin:n,
% \tag_mc_artifact_group_end:
% }
% This opens an artifact of the type given in the argument,
% and then stops all tagging. It creates a group.
% It pushes and pops mc-chunks at the begin and end.
% \begin{macrocode}
%\cs_new_protected:Npn \tag_mc_artifact_group_begin:n #1 {}
%\cs_new_protected:Npn \tag_mc_artifact_group_end:{}
%<*shared>
\cs_set_protected:Npn \tag_mc_artifact_group_begin:n #1
{
\tag_mc_end_push:
\tag_mc_begin:n {artifact=#1}
\group_begin:
\tag_suspend:n{artifact-group}
}
\cs_set_protected:Npn \tag_mc_artifact_group_end:
{
\tag_resume:n{artifact-group}
\group_end:
\tag_mc_end:
\tag_mc_begin_pop:n{}
}
%
% \end{macrocode}
% \end{macro}
% \begin{macro}{\tag_mc_reset_box:N}
% This allows to reset the mc-attributes in box. On base and generic mode it should do
% nothing.
% \begin{macrocode}
%\cs_new_protected:Npn \tag_mc_reset_box:N #1 {}
% \end{macrocode}
% \end{macro}
% \begin{macro}{\tag_mc_end_push:, \tag_mc_begin_pop:n}
%
% \begin{macrocode}
%\cs_new_protected:Npn \tag_mc_end_push: {}
%\cs_new_protected:Npn \tag_mc_begin_pop:n #1 {}
%<*shared>
\cs_set_protected:Npn \tag_mc_end_push:
{
\@@_check_if_active_mc:T
{
\@@_mc_if_in:TF
{
\seq_gpush:Ne \g_@@_mc_stack_seq { \tag_get:n {mc_tag} }
\@@_check_mc_pushed_popped:nn
{ pushed }
{ \tag_get:n {mc_tag} }
\tag_mc_end:
}
{
\seq_gpush:Nn \g_@@_mc_stack_seq {-1}
\@@_check_mc_pushed_popped:nn { pushed }{-1}
}
}
}
\cs_set_protected:Npn \tag_mc_begin_pop:n #1
{
\@@_check_if_active_mc:T
{
\seq_gpop:NNTF \g_@@_mc_stack_seq \l_@@_tmpa_tl
{
\tl_if_eq:NnTF \l_@@_tmpa_tl {-1}
{
\@@_check_mc_pushed_popped:nn {popped}{-1}
}
{
\@@_check_mc_pushed_popped:nn {popped}{\l_@@_tmpa_tl}
\tag_mc_begin:n {tag=\l_@@_tmpa_tl,#1}
}
}
{
\@@_check_mc_pushed_popped:nn {popped}{empty~stack,~nothing}
}
}
}
% \end{macrocode}
% \end{macro}
%
% \subsection{Keys}
% This are the keys where the code can be shared between the modes.
%
% \begin{macro}{stash (mc-key),__artifact-bool,__artifact-type}
% the two internal artifact keys are use to define the public |artifact|.
% For now we add support for the subtypes Header and Footer.
% Watermark,PageNum, LineNum,Redaction,Bates will be added if some use case
% emerges. If some use case for /BBox and /Attached emerges, it will be perhaps
% necessary to adapt the code.
% \begin{macrocode}
\keys_define:nn { @@ / mc }
{
stash .bool_set:N = \l_@@_mc_key_stash_bool,
__artifact-bool .bool_set:N = \l_@@_mc_artifact_bool,
__artifact-type .choice:,
__artifact-type / pagination .code:n =
{
\tl_set:Nn \l_@@_mc_artifact_type_tl { Pagination }
},
__artifact-type / pagination/header .code:n =
{
\tl_set:Nn \l_@@_mc_artifact_type_tl { Pagination/Subtype/Header }
},
__artifact-type / pagination/footer .code:n =
{
\tl_set:Nn \l_@@_mc_artifact_type_tl { Pagination/Subtype/Footer }
},
__artifact-type / layout .code:n =
{
\tl_set:Nn \l_@@_mc_artifact_type_tl { Layout }
},
__artifact-type / page .code:n =
{
\tl_set:Nn \l_@@_mc_artifact_type_tl { Page }
},
__artifact-type / background .code:n =
{
\tl_set:Nn \l_@@_mc_artifact_type_tl { Background }
},
__artifact-type / notype .code:n =
{
\tl_set:Nn \l_@@_mc_artifact_type_tl {}
},
__artifact-type / .code:n =
{
\tl_set:Nn \l_@@_mc_artifact_type_tl {}
},
}
% \end{macrocode}
% \end{macro}
% \begin{macrocode}
%
% \end{macrocode}
% \end{implementation}
% \PrintIndex