% \iffalse meta-comment % %% File: l3str.dtx % % Copyright (C) 2011-2024 The LaTeX Project % % It may be distributed and/or modified under the conditions of the % LaTeX Project Public License (LPPL), either version 1.3c of this % license or (at your option) any later version. The latest version % of this license is in the file % % https://www.latex-project.org/lppl.txt % % This file is part of the "l3kernel bundle" (The Work in LPPL) % and all files in that bundle must be distributed together. % % ----------------------------------------------------------------------- % % The development version of the bundle can be found at % % https://github.com/latex3/latex3 % % for those people who are interested. % %<*driver> \documentclass[full,kernel]{l3doc} \begin{document} \DocInput{\jobname.dtx} \end{document} % % \fi % % \title{^^A % The \pkg{l3str} module\\ Strings^^A % } % % \author{^^A % The \LaTeX{} Project\thanks % {^^A % E-mail: % \href{mailto:latex-team@latex-project.org} % {latex-team@latex-project.org}^^A % }^^A % } % % \date{Released 2024-12-25} % % \maketitle % % \begin{documentation} % % \TeX{} associates each character with a category code: as such, there is no % concept of a \enquote{string} as commonly understood in many other % programming languages. However, there are places where we wish to manipulate % token lists while in some sense \enquote{ignoring} category codes: this is % done by treating token lists as strings in a \TeX{} sense. % % A \TeX{} string (and thus an \pkg{expl3} string) is a series of characters % which have category code $12$ (\enquote{other}) with the exception of % space characters which have category code $10$ (\enquote{space}). Thus % at a technical level, a \TeX{} string is a token list with the appropriate % category codes. In this documentation, these are simply referred to as % strings. % % String variables are simply specialised token lists, but by convention % should be named with the suffix \texttt{\ldots{}str}. Such variables % should contain characters with category code $12$ (other), except % spaces, which have category code $10$ (blank space). All the % functions in this module which accept a token list argument first % convert it to a string using \cs{tl_to_str:n} for internal processing, % and do not treat a token list or the corresponding string % representation differently. % % As a string is a subset of the more general token list, it is sometimes unclear % when one should be used over the other. % Use a string variable for data that isn't primarily intended for typesetting % and for which a level of protection from unwanted expansion is suitable. % This data type simplifies comparison of variables since there are no concerns % about expansion of their contents. % % The functions \cs{cs_to_str:N}, \cs{tl_to_str:n}, \cs{tl_to_str:N} and % \cs{token_to_str:N} (and variants) generate strings from the appropriate % input: these are documented in \pkg{l3basics}, \pkg{l3tl} and \pkg{l3token}, % respectively. % % Most expandable functions in this module come in three flavours: % \begin{itemize} % \item \cs[no-index]{str_\ldots{}:N}, which expect a token list or string % variable as their argument; % \item \cs[no-index]{str_\ldots{}:n}, taking any token list (or string) as an % argument; % \item \cs[no-index]{str_\ldots{}_ignore_spaces:n}, which ignores any space % encountered during the operation: these functions are typically % faster than those which take care of escaping spaces % appropriately. % \end{itemize} % % \section{Creating and initialising string variables} % % \begin{function}[added = 2015-09-18]{\str_new:N, \str_new:c} % \begin{syntax} % \cs{str_new:N} \meta{str~var} % \end{syntax} % Creates a new \meta{str~var} or raises an error if the name is % already taken. The declaration is global. The \meta{str~var} is % initially empty. % \end{function} % % \begin{function}[added = 2015-09-18, updated = 2018-07-28] % { % \str_const:Nn, \str_const:NV, \str_const:Ne, % \str_const:cn, \str_const:cV, \str_const:ce % } % \begin{syntax} % \cs{str_const:Nn} \meta{str~var} \Arg{token list} % \end{syntax} % Creates a new constant \meta{str~var} or raises an error if the name % is already taken. The value of the \meta{str~var} is set % globally to the \meta{token list}, converted to a string. % \end{function} % % \begin{function}[added = 2015-09-18] % {\str_clear:N, \str_clear:c, \str_gclear:N, \str_gclear:c} % \begin{syntax} % \cs{str_clear:N} \meta{str~var} % \end{syntax} % Clears the content of the \meta{str~var}. % \end{function} % % \begin{function}[added = 2015-09-18] % { % \str_clear_new:N, \str_clear_new:c, % \str_gclear_new:N, \str_gclear_new:c % } % \begin{syntax} % \cs{str_clear_new:N} \meta{str~var} % \end{syntax} % Ensures that the \meta{str~var} exists globally by applying % \cs{str_new:N} if necessary, then applies % \cs[index=str_clear:N]{str_(g)clear:N} to leave % the \meta{str~var} empty. % \end{function} % % \begin{function}[added = 2015-09-18] % { % \str_set_eq:NN, \str_set_eq:cN, \str_set_eq:Nc, \str_set_eq:cc, % \str_gset_eq:NN, \str_gset_eq:cN, \str_gset_eq:Nc, \str_gset_eq:cc % } % \begin{syntax} % \cs{str_set_eq:NN} \meta{str~var_1} \meta{str~var_2} % \end{syntax} % Sets the content of \meta{str~var_1} equal to that of % \meta{str~var_2}. % \end{function} % % \begin{function}[added = 2017-10-08] % { % \str_concat:NNN, \str_concat:ccc, % \str_gconcat:NNN, \str_gconcat:ccc % } % \begin{syntax} % \cs{str_concat:NNN} \meta{str~var_1} \meta{str~var_2} \meta{str~var_3} % \end{syntax} % Concatenates the content of \meta{str~var_2} and \meta{str~var_3} % together and saves the result in \meta{str~var_1}. The \meta{str~var_2} % is placed at the left side of the new string variable. % The \meta{str~var_2} and \meta{str~var_3} must indeed be strings, as % this function does not convert their contents to a string. % \end{function} % % \begin{function}[EXP, pTF, added = 2015-09-18] % {\str_if_exist:N, \str_if_exist:c} % \begin{syntax} % \cs{str_if_exist_p:N} \meta{str~var} % \cs{str_if_exist:NTF} \meta{str~var} \Arg{true code} \Arg{false code} % \end{syntax} % Tests whether the \meta{str~var} is currently defined. This does not % check that the \meta{str~var} really is a string. % \end{function} % % \section{Adding data to string variables} % % \begin{function}[added = 2015-09-18, updated = 2018-07-28] % { % \str_set:Nn, \str_set:NV, \str_set:Ne, % \str_set:cn, \str_set:cV, \str_set:ce, % \str_gset:Nn, \str_gset:NV, \str_gset:Ne, % \str_gset:cn, \str_gset:cV, \str_gset:ce % } % \begin{syntax} % \cs{str_set:Nn} \meta{str var} \Arg{token list} % \end{syntax} % Converts the \meta{token list} to a \meta{string}, and stores the % result in \meta{str var}. % \end{function} % % \begin{function}[added = 2015-09-18, updated = 2018-07-28] % { % \str_put_left:Nn, \str_put_left:NV, \str_put_left:Ne, % \str_put_left:cn, \str_put_left:cV, \str_put_left:ce, % \str_gput_left:Nn, \str_gput_left:NV, \str_gput_left:Ne, % \str_gput_left:cn, \str_gput_left:cV, \str_gput_left:ce % } % \begin{syntax} % \cs{str_put_left:Nn} \meta{str var} \Arg{token list} % \end{syntax} % Converts the \meta{token list} to a \meta{string}, and prepends the % result to \meta{str var}. The current contents of the \meta{str % var} are not automatically converted to a string. % \end{function} % % \begin{function}[added = 2015-09-18, updated = 2018-07-28] % { % \str_put_right:Nn, \str_put_right:NV, \str_put_right:Ne, % \str_put_right:cn, \str_put_right:cV, \str_put_right:ce, % \str_gput_right:Nn, \str_gput_right:NV, \str_gput_right:Ne, % \str_gput_right:cn, \str_gput_right:cV, \str_gput_right:ce % } % \begin{syntax} % \cs{str_put_right:Nn} \meta{str var} \Arg{token list} % \end{syntax} % Converts the \meta{token list} to a \meta{string}, and appends the % result to \meta{str var}. The current contents of the \meta{str % var} are not automatically converted to a string. % \end{function} % % \section{String conditionals} % % \begin{function}[EXP,pTF, added = 2015-09-18, updated = 2022-03-21] % {\str_if_empty:N, \str_if_empty:c, \str_if_empty:n} % \begin{syntax} % \cs{str_if_empty_p:N} \meta{str~var} % \cs{str_if_empty:NTF} \meta{str~var} \Arg{true code} \Arg{false code} % \end{syntax} % Tests if the \meta{string variable} is entirely empty % (\emph{i.e.}~contains no characters at all). % \end{function} % % \begin{function}[EXP,pTF, added = 2015-09-18] % {\str_if_eq:NN, \str_if_eq:Nc, \str_if_eq:cN, \str_if_eq:cc} % \begin{syntax} % \cs{str_if_eq_p:NN} \meta{str~var_1} \meta{str~var_2} % \cs{str_if_eq:NNTF} \meta{str~var_1} \meta{str~var_2} \Arg{true code} \Arg{false code} % \end{syntax} % Compares the content of two \meta{str variables} and % is logically \texttt{true} if the two contain the same characters % in the same order. See \cs{tl_if_eq:NNTF} to compare tokens % (including their category codes) rather than characters. % \end{function} % % \begin{function}[EXP,pTF, updated = 2018-06-18] % { % \str_if_eq:nn, \str_if_eq:Vn, \str_if_eq:on, \str_if_eq:no, % \str_if_eq:nV, \str_if_eq:VV, \str_if_eq:vn, \str_if_eq:nv, % \str_if_eq:ee % } % \begin{syntax} % \cs{str_if_eq_p:nn} \Arg{tl_1} \Arg{tl_2} % \cs{str_if_eq:nnTF} \Arg{tl_1} \Arg{tl_2} \Arg{true code} \Arg{false code} % \end{syntax} % Compares the two \meta{token lists} on a character by character % basis (namely after converting them to strings), % and is \texttt{true} if the two \meta{strings} contain the same % characters in the same order. Thus for example % \begin{verbatim} % \str_if_eq_p:no { abc } { \tl_to_str:n { abc } } % \end{verbatim} % is logically \texttt{true}. See \cs{tl_if_eq:nnTF} to compare % tokens (including their category codes) rather than characters. % \end{function} % % \begin{function}[TF, added = 2017-10-08]{\str_if_in:Nn, \str_if_in:cn} % \begin{syntax} % \cs{str_if_in:NnTF} \meta{str~var} \Arg{token list} \Arg{true code} \Arg{false code} % \end{syntax} % Converts the \meta{token list} to a \meta{string} and % tests if that \meta{string} is found in the content of the % \meta{str~var}. % \end{function} % % \begin{function}[TF, added = 2017-10-08]{\str_if_in:nn} % \begin{syntax} % \cs{str_if_in:nnTF} \Arg{tl_1} \Arg{tl_2} \Arg{true code} \Arg{false code} % \end{syntax} % Converts both \meta{token lists} to \meta{strings} and % tests whether \meta{string_2} is found inside \meta{string_1}. % \end{function} % % \begin{function}[added = 2013-07-24, updated = 2022-03-21, EXP, noTF] % { % \str_case:nn, \str_case:Vn, \str_case:Nn, \str_case:on, \str_case:en, % \str_case:nV, \str_case:nv % } % \begin{syntax} % \cs{str_case:nnTF} \Arg{test string} \\ % ~~|{| \\ % ~~~~\Arg{string case_1} \Arg{code case_1} \\ % ~~~~\Arg{string case_2} \Arg{code case_2} \\ % ~~~~\ldots \\ % ~~~~\Arg{string case_n} \Arg{code case_n} \\ % ~~|}| \\ % ~~\Arg{true code} % ~~\Arg{false code} % \end{syntax} % Compares the \meta{test string} in turn with each % of the \meta{string case}s until a match is found % (all token lists are converted to strings). % If the two are equal (as described for % \cs{str_if_eq:nnTF}) then the associated \meta{code} is left in the % input stream and other cases are discarded. If any of the % cases are matched, the \meta{true code} is also inserted into the % input stream (after the code for the appropriate case), while if none % match then the \meta{false code} is inserted. The function % \cs{str_case:nn}, which does nothing if there is no match, is also % available. % % This set of functions performs no expansion on each % \meta{string~case} argument, so any variable in there will be % compared as a string. If expansion is needed in the % \meta{string~case}s, then \cs[no-index]{str_case_e:nn(TF)} should % be used instead. % \end{function} % % \begin{function}[added = 2018-06-19, EXP, noTF] % {\str_case_e:nn, \str_case_e:en} % \begin{syntax} % \cs{str_case_e:nnTF} \Arg{test string} \\ % ~~|{| \\ % ~~~~\Arg{string case_1} \Arg{code case_1} \\ % ~~~~\Arg{string case_2} \Arg{code case_2} \\ % ~~~~\ldots \\ % ~~~~\Arg{string case_n} \Arg{code case_n} \\ % ~~|}| \\ % ~~\Arg{true code} % ~~\Arg{false code} % \end{syntax} % Compares the full expansion of the \meta{test string} % in turn with the full expansion of the \meta{string case}s % (all token lists are converted to strings). If the two % full expansions are equal (as described for \cs{str_if_eq:eeTF}) then the % associated \meta{code} is left in the input stream % and other cases are discarded. If any of the % cases are matched, the \meta{true code} is also inserted into the % input stream (after the code for the appropriate case), while if none % match then the \meta{false code} is inserted. The function % \cs{str_case_e:nn}, which does nothing if there is no match, is also % available. % In \cs[index=str_case_e:nnTF]{str_case_e:nn(TF)}, the \meta{test string} % is expanded in each comparison, and must always yield the same result: % for example, random numbers must not be used within this string. % \end{function} % % \begin{function}[EXP, pTF, added = 2021-05-17]{\str_compare:nNn, \str_compare:eNe} % \begin{syntax} % \cs{str_compare_p:nNn} \Arg{tl_1} \meta{relation} \Arg{tl_2} % \cs{str_compare:nNnTF} \Arg{tl_1} \meta{relation} \Arg{tl_2} \Arg{true code} \Arg{false code} % \end{syntax} % Compares the two \meta{token lists} on a character by character % basis (namely after converting them to strings) in a lexicographic % order according to the character codes of the characters. The % \meta{relation} can be |<|, |=|, or~|>| and the test is % \texttt{true} under the following conditions: % \begin{itemize} % \item for |<|, if the first string is earlier than the second in lexicographic order; % \item for |=|, if the two strings have exactly the same characters; % \item for |>|, if the first string is later than the second in lexicographic order. % \end{itemize} % Thus for example the following is logically \texttt{true}: % \begin{verbatim} % \str_compare_p:nNn { ab } < { abc } % \end{verbatim} % \begin{texnote} % This is a wrapper around the \TeX{} primitive % \cs[index=pdfstrcmp]{(pdf)strcmp}. It is meant for programming % and not for sorting textual contents, as it simply considers % character codes and not more elaborate considerations of grapheme % clusters, locale, etc. % \end{texnote} % \end{function} % % \section{Mapping over strings} % % All mappings are done at the current group level, \emph{i.e.}~any % local assignments made by the \meta{function} or \meta{code} discussed % below remain in effect after the loop. % % \begin{function}[added = 2017-11-14, rEXP] % {\str_map_function:nN, \str_map_function:NN, \str_map_function:cN} % \begin{syntax} % \cs{str_map_function:nN} \Arg{token list} \meta{function} % \cs{str_map_function:NN} \meta{str~var} \meta{function} % \end{syntax} % Converts the \meta{token list} to a \meta{string} then % applies \meta{function} to every \meta{character} in the % \meta{string} including spaces. % \end{function} % % \begin{function}[added = 2017-11-14] % {\str_map_inline:nn, \str_map_inline:Nn, \str_map_inline:cn} % \begin{syntax} % \cs{str_map_inline:nn} \Arg{token list} \Arg{inline function} % \cs{str_map_inline:Nn} \meta{str~var} \Arg{inline function} % \end{syntax} % Converts the \meta{token list} to a \meta{string} then % applies the \meta{inline function} to every \meta{character} in the % \meta{str~var} including spaces. % The \meta{inline function} should consist of code which % receives the \meta{character} as |#1|. % \end{function} % % \begin{function}[rEXP, added = 2021-05-05] % {\str_map_tokens:nn, \str_map_tokens:Nn, \str_map_tokens:cn} % \begin{syntax} % \cs{str_map_tokens:nn} \Arg{token list} \Arg{code} % \cs{str_map_tokens:Nn} \meta{str~var} \Arg{code} % \end{syntax} % Converts the \meta{token list} to a \meta{string} then applies % \meta{code} to every \meta{character} in the \meta{string} including % spaces. The \meta{code} receives each character as a trailing brace % group. This is equivalent to \cs{str_map_function:nN} if the % \meta{code} consists of a single function. % \end{function} % % \begin{function}[added = 2017-11-14] % {\str_map_variable:nNn, \str_map_variable:NNn, \str_map_variable:cNn} % \begin{syntax} % \cs{str_map_variable:nNn} \Arg{token list} \meta{variable} \Arg{code} % \cs{str_map_variable:NNn} \meta{str~var} \meta{variable} \Arg{code} % \end{syntax} % Converts the \meta{token list} to a \meta{string} then stores each % \meta{character} in the \meta{string} (including spaces) in turn in % the (string or token list) \meta{variable} and applies the % \meta{code}. The \meta{code} will usually make use of the % \meta{variable}, but this is not enforced. The assignments to the % \meta{variable} are local. Its value after the loop is the last % \meta{character} in the \meta{string}, or its original value if the % \meta{string} is empty. See also \cs{str_map_inline:Nn}. % \end{function} % % \begin{function}[added = 2017-10-08, rEXP]{\str_map_break:} % \begin{syntax} % \cs{str_map_break:} % \end{syntax} % Used to terminate a \cs[no-index]{str_map_\ldots} function before all % characters in the \meta{string} have been processed. This % normally takes place within a conditional statement, for example % \begin{verbatim} % \str_map_inline:Nn \l_my_str % { % \str_if_eq:nnT { #1 } { bingo } { \str_map_break: } % % Do something useful % } % \end{verbatim} % See also \cs{str_map_break:n}. % Use outside of a \cs[no-index]{str_map_\ldots} scenario leads to low % level \TeX{} errors. % \begin{texnote} % When the mapping is broken, additional tokens may be inserted % before continuing with the % code that follows the loop. % This depends on the design of the mapping function. % \end{texnote} % \end{function} % % \begin{function}[added = 2017-10-08, rEXP]{\str_map_break:n} % \begin{syntax} % \cs{str_map_break:n} \Arg{code} % \end{syntax} % Used to terminate a \cs[no-index]{str_map_\ldots} function before all % characters in the \meta{string} have been processed, inserting % the \meta{code} after the mapping has ended. This % normally takes place within a conditional statement, for example % \begin{verbatim} % \str_map_inline:Nn \l_my_str % { % \str_if_eq:nnT { #1 } { bingo } % { \str_map_break:n { } } % % Do something useful % } % \end{verbatim} % Use outside of a \cs[no-index]{str_map_\ldots} scenario leads to low % level \TeX{} errors. % \begin{texnote} % When the mapping is broken, additional tokens may be inserted % before the \meta{code} is % inserted into the input stream. % This depends on the design of the mapping function. % \end{texnote} % \end{function} % % \section{Working with the content of strings} % % \begin{function}[EXP, added = 2015-09-18]{\str_use:N, \str_use:c} % \begin{syntax} % \cs{str_use:N} \meta{str~var} % \end{syntax} % Recovers the content of a \meta{str~var} and places it % directly in the input stream. An error is raised if the variable % does not exist or if it is invalid. Note that it is possible to use % a \meta{str} directly without an accessor function. % \end{function} % % \begin{function}[EXP, added = 2015-09-18] % {\str_count:N, \str_count:c, \str_count:n, \str_count_ignore_spaces:n} % \begin{syntax} % \cs{str_count:n} \Arg{token list} % \end{syntax} % Leaves in the input stream the number of characters in the string % representation of \meta{token list}, as an integer denotation. The % functions differ in their treatment of spaces. In the case of % \cs{str_count:N} and \cs{str_count:n}, all characters including % spaces are counted. The \cs{str_count_ignore_spaces:n} function % leaves the number of non-space characters in the input stream. % \end{function} % % \begin{function}[EXP, added = 2015-09-18] % {\str_count_spaces:N, \str_count_spaces:c, \str_count_spaces:n} % \begin{syntax} % \cs{str_count_spaces:n} \Arg{token list} % \end{syntax} % Leaves in the input stream the number of space characters in the % string representation of \meta{token list}, as an integer % denotation. Of course, this function has no \texttt{_ignore_spaces} % variant. % \end{function} % % \begin{function}[EXP, added = 2015-09-18] % {\str_head:N, \str_head:c, \str_head:n, \str_head_ignore_spaces:n} % \begin{syntax} % \cs{str_head:n} \Arg{token list} % \end{syntax} % Converts the \meta{token list} into a \meta{string}. The first % character in the \meta{string} is then left in the input stream, % with category code \enquote{other}. The functions differ if the % first character is a space: \cs{str_head:N} and \cs{str_head:n} % return a space token with category code~$10$ (blank space), while % the \cs{str_head_ignore_spaces:n} function ignores this space % character and leaves the first non-space character in the input % stream. If the \meta{string} is empty (or only contains spaces in % the case of the \texttt{_ignore_spaces} function), then nothing is % left on the input stream. % \end{function} % % \begin{function}[EXP, added = 2015-09-18] % {\str_tail:N, \str_tail:c, \str_tail:n, \str_tail_ignore_spaces:n} % \begin{syntax} % \cs{str_tail:n} \Arg{token list} % \end{syntax} % Converts the \meta{token list} to a \meta{string}, removes the first % character, and leaves the remaining characters (if any) in the input % stream, with category codes $12$ and $10$ (for spaces). The % functions differ in the case where the first character is a space: % \cs{str_tail:N} and \cs{str_tail:n} only trim that space, while % \cs{str_tail_ignore_spaces:n} removes the first non-space character % and any space before it. If the \meta{token list} is empty (or % blank in the case of the \texttt{_ignore_spaces} variant), then % nothing is left on the input stream. % \end{function} % % \begin{function}[EXP, added = 2015-09-18] % {\str_item:Nn, \str_item:cn, \str_item:nn, \str_item_ignore_spaces:nn} % \begin{syntax} % \cs{str_item:nn} \Arg{token list} \Arg{integer expression} % \end{syntax} % Converts the \meta{token list} to a \meta{string}, and leaves in the % input stream the character in position \meta{integer expression} of % the \meta{string}, starting at $1$ for the first (left-most) % character. In the case of \cs{str_item:Nn} and \cs{str_item:nn}, % all characters including spaces are taken into account. The % \cs{str_item_ignore_spaces:nn} function skips spaces when counting % characters. If the \meta{integer expression} is negative, % characters are counted from the end of the \meta{string}. Hence, % $-1$ is the right-most character, \emph{etc.} % \end{function} % % \begin{function}[EXP, added = 2015-09-18] % { % \str_range:Nnn, \str_range:cnn, \str_range:nnn, % \str_range_ignore_spaces:nnn % } % \begin{syntax} % \cs{str_range:nnn} \Arg{token list} \Arg{start index} \Arg{end index} % \end{syntax} % Converts the \meta{token list} to a \meta{string}, and leaves in the % input stream the characters from the \meta{start index} to the % \meta{end index} inclusive. Spaces are preserved and counted as items % (contrast this with \cs{tl_range:nnn} where spaces are not counted as % items and are possibly discarded from the output). % % Here \meta{start index} and \meta{end index} should be integer denotations. % For describing in detail the functions' behavior, let $m$ and $n$ be the start % and end index respectively. If either is $0$, the result is empty. A positive % index means `start counting from the left end', a negative index means % `start counting from the right end'. Let $l$ be the count of the token list. % % The \emph{actual start point} is determined as $M=m$ if~$m>0$ and as $M=l+m+1$ % if~$m<0$. Similarly the \emph{actual end point} is $N=n$ if~$n>0$ and $N=l+n+1$ % if~$n<0$. If $M>N$, the result is empty. Otherwise it consists of all items from % position $M$ to position $N$ inclusive; for the purpose of this rule, we can % imagine that the token list extends at infinity on either side, with void items % at positions $s$ for $s\le0$ or $s>l$. % For instance, % \begin{verbatim} % \iow_term:e { \str_range:nnn { abcdef } { 2 } { 5 } } % \iow_term:e { \str_range:nnn { abcdef } { -4 } { -1 } } % \iow_term:e { \str_range:nnn { abcdef } { -2 } { -1 } } % \iow_term:e { \str_range:nnn { abcdef } { 0 } { -1 } } % \end{verbatim} % prints \texttt{bcde}, \texttt{cdef}, \texttt{ef}, and an empty % line to the terminal. The \meta{start index} must always be smaller than % or equal to the \meta{end index}: if this is not the case then no output % is generated. Thus % \begin{verbatim} % \iow_term:e { \str_range:nnn { abcdef } { 5 } { 2 } } % \iow_term:e { \str_range:nnn { abcdef } { -1 } { -4 } } % \end{verbatim} % both yield empty strings. % \end{function} % % ^^A If this stays in the same {function} environment, we get a really % ^^A awful page break. Perhaps we should add a way to allow a page break % ^^A in a function environment... % The behavior of \cs{str_range_ignore_spaces:nnn} is similar, but spaces % are removed before starting the job. The input % \begin{verbatim} % \iow_term:e { \str_range:nnn { abcdefg } { 2 } { 5 } } % \iow_term:e { \str_range:nnn { abcdefg } { 2 } { -3 } } % \iow_term:e { \str_range:nnn { abcdefg } { -6 } { 5 } } % \iow_term:e { \str_range:nnn { abcdefg } { -6 } { -3 } } % % \iow_term:e { \str_range:nnn { abc~efg } { 2 } { 5 } } % \iow_term:e { \str_range:nnn { abc~efg } { 2 } { -3 } } % \iow_term:e { \str_range:nnn { abc~efg } { -6 } { 5 } } % \iow_term:e { \str_range:nnn { abc~efg } { -6 } { -3 } } % % \iow_term:e { \str_range_ignore_spaces:nnn { abcdefg } { 2 } { 5 } } % \iow_term:e { \str_range_ignore_spaces:nnn { abcdefg } { 2 } { -3 } } % \iow_term:e { \str_range_ignore_spaces:nnn { abcdefg } { -6 } { 5 } } % \iow_term:e { \str_range_ignore_spaces:nnn { abcdefg } { -6 } { -3 } } % % \iow_term:e { \str_range_ignore_spaces:nnn { abcd~efg } { 2 } { 5 } } % \iow_term:e { \str_range_ignore_spaces:nnn { abcd~efg } { 2 } { -3 } } % \iow_term:e { \str_range_ignore_spaces:nnn { abcd~efg } { -6 } { 5 } } % \iow_term:e { \str_range_ignore_spaces:nnn { abcd~efg } { -6 } { -3 } } % \end{verbatim} % will print four instances of |bcde|, four instances of |bc e| and eight % instances of |bcde|. % ^^A\end{function} % % \section{Modifying string variables} % % \begin{function}[added = 2017-10-08] % { % \str_replace_once:Nnn, \str_replace_once:cnn, % \str_greplace_once:Nnn, \str_greplace_once:cnn % } % \begin{syntax} % \cs{str_replace_once:Nnn} \meta{str~var} \Arg{old} \Arg{new} % \end{syntax} % Converts the \meta{old} and \meta{new} token lists to strings, then % replaces the first (leftmost) occurrence of \meta{old string} in the % \meta{str~var} with \meta{new string}. % \end{function} % % \begin{function}[added = 2017-10-08] % { % \str_replace_all:Nnn, \str_replace_all:cnn, % \str_greplace_all:Nnn, \str_greplace_all:cnn % } % \begin{syntax} % \cs{str_replace_all:Nnn} \meta{str~var} \Arg{old} \Arg{new} % \end{syntax} % Converts the \meta{old} and \meta{new} token lists to strings, then % replaces all occurrences of \meta{old string} in the % \meta{str~var} with \meta{new string}. % As this function % operates from left to right, the pattern \meta{old string} % may remain after the replacement (see \cs{str_remove_all:Nn} % for an example). % \end{function} % % \begin{function}[added = 2017-10-08] % { % \str_remove_once:Nn, \str_remove_once:cn, % \str_gremove_once:Nn, \str_gremove_once:cn % } % \begin{syntax} % \cs{str_remove_once:Nn} \meta{str~var} \Arg{token list} % \end{syntax} % Converts the \meta{token list} to a \meta{string} then % removes the first (leftmost) occurrence of \meta{string} from the % \meta{str~var}. % \end{function} % % \begin{function}[added = 2017-10-08] % { % \str_remove_all:Nn, \str_remove_all:cn, % \str_gremove_all:Nn, \str_gremove_all:cn % } % \begin{syntax} % \cs{str_remove_all:Nn} \meta{str~var} \Arg{token list} % \end{syntax} % Converts the \meta{token list} to a \meta{string} then % removes all occurrences of \meta{string} from the % \meta{str~var}. % As this function % operates from left to right, the pattern \meta{string} % may remain after the removal, for instance, % \begin{quote} % \cs{str_set:Nn} \cs{l_tmpa_str} |{abbccd}| % \cs{str_remove_all:Nn} \cs{l_tmpa_str} |{bc}| % \end{quote} % results in \cs{l_tmpa_str} containing \texttt{abcd}. % \end{function} % % \section{String manipulation} % % \begin{function}[EXP, added = 2019-11-26] % { % \str_lowercase:n, \str_lowercase:f, % \str_uppercase:n, \str_uppercase:f % } % \begin{syntax} % \cs{str_lowercase:n} \Arg{tokens} % \cs{str_uppercase:n} \Arg{tokens} % \end{syntax} % Converts the input \meta{tokens} to their string representation, as % described for \cs{tl_to_str:n}, and then to the lower or upper % case representation using a one-to-one mapping as described by the % Unicode Consortium file |UnicodeData.txt|. % % These functions are intended for case changing programmatic data in % places where upper/lower case distinctions are meaningful. One example % would be automatically generating a function name from user input where % some case changing is needed. In this situation the input is programmatic, % not textual, case does have meaning and a language-independent one-to-one % mapping is appropriate. For example % \begin{verbatim} % \cs_new_protected:Npn \myfunc:nn #1#2 % { % \cs_set_protected:cpn % { % user % \str_uppercase:f { \tl_head:n {#1} } % \str_lowercase:f { \tl_tail:n {#1} } % } % { #2 } % } % \end{verbatim} % would be used to generate a function with an auto-generated name consisting % of the upper case equivalent of the supplied name followed by the lower % case equivalent of the rest of the input. % % These functions should \emph{not} be used for % \begin{itemize} % \item Caseless comparisons: use \cs{str_casefold:n} for this % situation (case folding is distinct from lower casing). % \item Case changing text for typesetting: see the % \cs[index=text_lowercase:n]{text_lowercase:n(n)}, % \cs[index=text_uppercase:n]{text_uppercase:n(n)} and % \cs[index=text_titlecase_all:n]{text_titlecase_(all|first):n(n)} functions which % correctly deal with context-dependence and other factors appropriate % to text case changing. % \end{itemize} % \end{function} % % \begin{function}[EXP, added = 2022-10-16] % {\str_casefold:n, \str_casefold:V} % \begin{syntax} % \cs{str_casefold:n} \Arg{tokens} % \end{syntax} % Converts the input \meta{tokens} to their string representation, as % described for \cs{tl_to_str:n}, and then folds the case of the resulting % \meta{string} to remove case information. The result of this process is % left in the input stream. % % String folding is a process used for material such as identifiers rather % than for \enquote{text}. The folding provided by \cs{str_casefold:n} % follows the mappings provided by the \href{http://www.unicode.org}^^A % {Unicode Consortium}, who % \href{http://www.unicode.org/faq/casemap_charprop.html#2}{state}: % \begin{quote} % Case folding is primarily used for caseless comparison of text, such % as identifiers in a computer program, rather than actual text % transformation. Case folding in Unicode is based on the lowercase % mapping, but includes additional changes to the source text to help make % it language-insensitive and consistent. As a result, case-folded text % should be used solely for internal processing and generally should not be % stored or displayed to the end user. % \end{quote} % The folding approach implemented by \cs{str_casefold:n} follows the % \enquote{full} scheme defined by the Unicode Consortium % (\emph{e.g.}~\SS{} folds to \texttt{SS}). As case-folding is % a language-insensitive process, there is no special treatment of % Turkic input (\emph{i.e.}~\texttt{I} always folds to \texttt{i} and % not to \texttt{\i}). % \end{function} % % \begin{function}[added = 2023-05-19, EXP]{\str_mdfive_hash:n, \str_mdfive_hash:e} % \begin{syntax} % \cs{str_mdfive_hash:n} \Arg{tokens} % \end{syntax} % Expands to the MD5 sum generated from the \meta{tokens}, which is converted % to a \meta{string} as described for \cs{tl_to_str:n}. % \end{function} % % \section{Viewing strings} % % \begin{function}[added = 2015-09-18, updated = 2021-04-29] % {\str_show:N, \str_show:c, \str_show:n} % \begin{syntax} % \cs{str_show:N} \meta{str~var} % \end{syntax} % Displays the content of the \meta{str~var} on the terminal. % \end{function} % % \begin{function}[added = 2019-02-15, updated = 2021-04-29] % {\str_log:N, \str_log:c, \str_log:n} % \begin{syntax} % \cs{str_log:N} \meta{str~var} % \end{syntax} % Writes the content of the \meta{str~var} in the log file. % \end{function} % % \section{Constant strings} % % \begin{variable}[added = 2015-09-19, updated = 2020-12-22, module = str] % { % \c_ampersand_str, % \c_atsign_str, % \c_backslash_str, % \c_left_brace_str, % \c_right_brace_str, % \c_circumflex_str, % \c_colon_str, % \c_dollar_str, % \c_hash_str, % \c_percent_str, % \c_tilde_str, % \c_underscore_str, % \c_zero_str % } % Constant strings, containing a single character token, with category % code $12$. % \end{variable} % % \begin{variable}[added = 2023-12-07]{\c_empty_str} % Constant that is always empty. % \end{variable} % % \section{Scratch strings} % % \begin{variable}{\l_tmpa_str, \l_tmpb_str} % Scratch strings for local assignment. These are never used by % the kernel code, and so are safe for use with any \LaTeX3-defined % function. However, they may be overwritten by other non-kernel % code and so should only be used for short-term storage. % \end{variable} % % \begin{variable}{\g_tmpa_str, \g_tmpb_str} % Scratch strings for global assignment. These are never used by % the kernel code, and so are safe for use with any \LaTeX3-defined % function. However, they may be overwritten by other non-kernel % code and so should only be used for short-term storage. % \end{variable} % % \end{documentation} % % \begin{implementation} % % \section{\pkg{l3str} implementation} % % \begin{macrocode} %<*package> % \end{macrocode} % % \begin{macrocode} %<@@=str> % \end{macrocode} % % \subsection{Internal auxiliaries} % % \begin{variable}{\s_@@_mark,\s_@@_stop} % Internal scan marks. % \begin{macrocode} \scan_new:N \s_@@_mark \scan_new:N \s_@@_stop % \end{macrocode} % \end{variable} % % \begin{macro}[EXP]{ % \@@_use_none_delimit_by_s_stop:w, % \@@_use_i_delimit_by_s_stop:nw % } % Functions to gobble up to a scan mark. % \begin{macrocode} \cs_new:Npn \@@_use_none_delimit_by_s_stop:w #1 \s_@@_stop { } \cs_new:Npn \@@_use_i_delimit_by_s_stop:nw #1 #2 \s_@@_stop {#1} % \end{macrocode} % \end{macro} % % \begin{variable}{\q_@@_recursion_tail,\q_@@_recursion_stop} % Internal recursion quarks. % \begin{macrocode} \quark_new:N \q_@@_recursion_tail \quark_new:N \q_@@_recursion_stop % \end{macrocode} % \end{variable} % % \begin{macro}[EXP]{ % \@@_if_recursion_tail_break:NN, % \@@_if_recursion_tail_stop_do:Nn % } % Functions to query recursion quarks. % \begin{macrocode} \__kernel_quark_new_test:N \@@_if_recursion_tail_break:NN \__kernel_quark_new_test:N \@@_if_recursion_tail_stop_do:Nn % \end{macrocode} % \end{macro} % % \subsection{Creating and setting string variables} % % \begin{macro} % { % \str_new:N, \str_new:c, % \str_use:N, \str_use:c, % \str_clear:N, \str_clear:c, % \str_gclear:N,\str_gclear:c, % \str_clear_new:N, \str_clear_new:c, % \str_gclear_new:N, \str_gclear_new:c % } % \begin{macro} % { % \str_set_eq:NN, \str_set_eq:cN, \str_set_eq:Nc, \str_set_eq:cc, % \str_gset_eq:NN, \str_gset_eq:cN, \str_gset_eq:Nc, \str_gset_eq:cc % } % \begin{macro} % {\str_concat:NNN, \str_concat:ccc, \str_gconcat:NNN, \str_gconcat:ccc} % A string is simply a token list. The full mapping system isn't set up % yet so do things by hand. % \begin{macrocode} \group_begin: \cs_set_protected:Npn \@@_tmp:n #1 { \tl_if_blank:nF {#1} { \cs_new_eq:cc { str_ #1 :N } { tl_ #1 :N } \exp_args:Nc \cs_generate_variant:Nn { str_ #1 :N } { c } \@@_tmp:n } } \@@_tmp:n { new } { use } { clear } { gclear } { clear_new } { gclear_new } { } \group_end: \cs_new_eq:NN \str_set_eq:NN \tl_set_eq:NN \cs_new_eq:NN \str_gset_eq:NN \tl_gset_eq:NN \cs_generate_variant:Nn \str_set_eq:NN { c , Nc , cc } \cs_generate_variant:Nn \str_gset_eq:NN { c , Nc , cc } \cs_new_eq:NN \str_concat:NNN \tl_concat:NNN \cs_new_eq:NN \str_gconcat:NNN \tl_gconcat:NNN \cs_generate_variant:Nn \str_concat:NNN { ccc } \cs_generate_variant:Nn \str_gconcat:NNN { ccc } % \end{macrocode} % \end{macro} % \end{macro} % \end{macro} % % \begin{macro} % { % \str_set:Nn, \str_set:NV, \str_set:Ne, \str_set:Nx, % \str_set:cn, \str_set:cV, \str_set:ce, \str_set:cx, % \str_gset:Nn, \str_gset:NV, \str_gset:Ne, \str_gset:Nx, % \str_gset:cn, \str_gset:cV, \str_gset:ce, \str_gset:cx, % \str_const:Nn, \str_const:NV, \str_const:Ne, \str_const:Nx, % \str_const:cn, \str_const:cV, \str_const:ce, \str_const:cx, % \str_put_left:Nn, \str_put_left:NV, \str_put_left:Ne, \str_put_left:Nx, % \str_put_left:cn, \str_put_left:cV, \str_put_left:ce, \str_put_left:cx, % \str_gput_left:Nn, \str_gput_left:NV, \str_gput_left:Ne, \str_gput_left:Nx, % \str_gput_left:cn, \str_gput_left:cV, \str_gput_left:ce, \str_gput_left:cx, % \str_put_right:Nn, \str_put_right:NV, \str_put_right:Ne, \str_put_right:Nx, % \str_put_right:cn, \str_put_right:cV, \str_put_right:ce, \str_put_right:cx, % \str_gput_right:Nn, \str_gput_right:NV, \str_gput_right:Ne, \str_gput_right:Nx, % \str_gput_right:cn, \str_gput_right:cV, \str_gput_right:ce, \str_gput_right:cx % } % Similar to corresponding \pkg{l3tl} base functions, except that % \cs{__kernel_exp_not:w} is replaced with \cs{__kernel_tl_to_str:w}. % Just like token list, string constants use \cs{cs_gset_nopar:Npe} % instead of \cs{__kernel_tl_gset:Nx} so that the scope checking for % |c| is applied when \pkg{l3debug} is used. % To maintain backward compatibility, in % \cs[index=str_put_left:Nn]{str_(g)put_left:Nn} and % \cs[index=str_put_right:Nn]{str_(g)put_right:Nn}, % contents of string variables are wrapped in \cs{__kernel_exp_not:w} % to prevent further expansion. % \begin{macrocode} \cs_new_protected:Npn \str_set:Nn #1#2 { \__kernel_tl_set:Nx #1 { \__kernel_tl_to_str:w {#2} } } \cs_gset_protected:Npn \str_gset:Nn #1#2 { \__kernel_tl_gset:Nx #1 { \__kernel_tl_to_str:w {#2} } } \cs_new_protected:Npn \str_const:Nn #1#2 { \__kernel_chk_if_free_cs:N #1 \cs_gset_nopar:Npe #1 { \__kernel_tl_to_str:w {#2} } } \cs_new_protected:Npn \str_put_left:Nn #1#2 { \__kernel_tl_set:Nx #1 { \__kernel_tl_to_str:w {#2} \__kernel_exp_not:w \exp_after:wN {#1} } } \cs_new_protected:Npn \str_gput_left:Nn #1#2 { \__kernel_tl_gset:Nx #1 { \__kernel_tl_to_str:w {#2} \__kernel_exp_not:w \exp_after:wN {#1} } } \cs_new_protected:Npn \str_put_right:Nn #1#2 { \__kernel_tl_set:Nx #1 { \__kernel_exp_not:w \exp_after:wN {#1} \__kernel_tl_to_str:w {#2} } } \cs_new_protected:Npn \str_gput_right:Nn #1#2 { \__kernel_tl_gset:Nx #1 { \__kernel_exp_not:w \exp_after:wN {#1} \__kernel_tl_to_str:w {#2} } } \cs_generate_variant:Nn \str_set:Nn { NV , Ne , Nx , c , cV , ce , cx } \cs_generate_variant:Nn \str_gset:Nn { NV , Ne , Nx , c , cV , ce , cx } \cs_generate_variant:Nn \str_const:Nn { NV , Ne , Nx , c , cV , ce , cx } \cs_generate_variant:Nn \str_put_left:Nn { NV , Ne , Nx , c , cV , ce , cx } \cs_generate_variant:Nn \str_gput_left:Nn { NV , Ne , Nx , c , cV , ce , cx } \cs_generate_variant:Nn \str_put_right:Nn { NV , Ne , Nx , c , cV , ce , cx } \cs_generate_variant:Nn \str_gput_right:Nn { NV , Ne , Nx , c , cV , ce , cx } % \end{macrocode} % \end{macro} % % \subsection{Modifying string variables} % % \begin{macro} % { % \str_replace_all:Nnn, \str_replace_all:cnn, % \str_greplace_all:Nnn, \str_greplace_all:cnn, % \str_replace_once:Nnn, \str_replace_once:cnn, % \str_greplace_once:Nnn, \str_greplace_once:cnn % } % \begin{macro}{\@@_replace:NNNnn} % \begin{macro}{\@@_replace_aux:NNNnnn} % \begin{macro}{\@@_replace_next:w} % Start by applying \cs{tl_to_str:n} to convert the old and new token % lists to strings, and also apply \cs{tl_to_str:N} to avoid any % issues if we are fed a token list variable. Then the code is a much % simplified version of the token list code because neither the % delimiter nor the replacement can contain macro parameters or % braces. The delimiter \cs{s_@@_mark} cannot appear in the string to % edit so it is used in all cases. Some |e|-expansion is unnecessary. % There is no need to avoid losing braces nor to protect against % expansion. The ending code is much simplified and does not need to % hide in braces. % \begin{macrocode} \cs_new_protected:Npn \str_replace_once:Nnn { \@@_replace:NNNnn \prg_do_nothing: \__kernel_tl_set:Nx } \cs_new_protected:Npn \str_greplace_once:Nnn { \@@_replace:NNNnn \prg_do_nothing: \__kernel_tl_gset:Nx } \cs_new_protected:Npn \str_replace_all:Nnn { \@@_replace:NNNnn \@@_replace_next:w \__kernel_tl_set:Nx } \cs_new_protected:Npn \str_greplace_all:Nnn { \@@_replace:NNNnn \@@_replace_next:w \__kernel_tl_gset:Nx } \cs_generate_variant:Nn \str_replace_once:Nnn { c } \cs_generate_variant:Nn \str_greplace_once:Nnn { c } \cs_generate_variant:Nn \str_replace_all:Nnn { c } \cs_generate_variant:Nn \str_greplace_all:Nnn { c } \cs_new_protected:Npn \@@_replace:NNNnn #1#2#3#4#5 { \tl_if_empty:nTF {#4} { \msg_error:nne { kernel } { empty-search-pattern } {#5} } { \use:e { \exp_not:n { \@@_replace_aux:NNNnnn #1 #2 #3 } { \tl_to_str:N #3 } { \tl_to_str:n {#4} } { \tl_to_str:n {#5} } } } } \cs_new_protected:Npn \@@_replace_aux:NNNnnn #1#2#3#4#5#6 { \cs_set:Npn \@@_replace_next:w ##1 #5 { ##1 #6 #1 } #2 #3 { \@@_replace_next:w #4 \@@_use_none_delimit_by_s_stop:w #5 \s_@@_stop } } \cs_new_eq:NN \@@_replace_next:w ? % \end{macrocode} % \end{macro} % \end{macro} % \end{macro} % \end{macro} % % \begin{macro}{\str_remove_once:Nn, \str_remove_once:cn} % \begin{macro}{\str_gremove_once:Nn, \str_gremove_once:cn} % Removal is just a special case of replacement. % \begin{macrocode} \cs_new_protected:Npn \str_remove_once:Nn #1#2 { \str_replace_once:Nnn #1 {#2} { } } \cs_new_protected:Npn \str_gremove_once:Nn #1#2 { \str_greplace_once:Nnn #1 {#2} { } } \cs_generate_variant:Nn \str_remove_once:Nn { c } \cs_generate_variant:Nn \str_gremove_once:Nn { c } % \end{macrocode} % \end{macro} % \end{macro} % % \begin{macro}{\str_remove_all:Nn, \str_remove_all:cn} % \begin{macro}{\str_gremove_all:Nn, \str_gremove_all:cn} % Removal is just a special case of replacement. % \begin{macrocode} \cs_new_protected:Npn \str_remove_all:Nn #1#2 { \str_replace_all:Nnn #1 {#2} { } } \cs_new_protected:Npn \str_gremove_all:Nn #1#2 { \str_greplace_all:Nnn #1 {#2} { } } \cs_generate_variant:Nn \str_remove_all:Nn { c } \cs_generate_variant:Nn \str_gremove_all:Nn { c } % \end{macrocode} % \end{macro} % \end{macro} % % \subsection{String comparisons} % % \begin{macro}[pTF, EXP] % { % \str_if_empty:N, \str_if_empty:c, \str_if_empty:n, % \str_if_exist:N, \str_if_exist:c % } % More copy-paste! % \begin{macrocode} \prg_new_eq_conditional:NNn \str_if_exist:N \tl_if_exist:N { p , T , F , TF } \prg_new_eq_conditional:NNn \str_if_exist:c \tl_if_exist:c { p , T , F , TF } \prg_new_eq_conditional:NNn \str_if_empty:N \tl_if_empty:N { p , T , F , TF } \prg_new_eq_conditional:NNn \str_if_empty:c \tl_if_empty:c { p , T , F , TF } \prg_new_eq_conditional:NNn \str_if_empty:n \tl_if_empty:n { p , T , F , TF } % \end{macrocode} % \end{macro} % % \begin{macro}[EXP]{\@@_if_eq:nn} % String comparisons rely on the primitive \tn[index=pdfstrcmp]{(pdf)strcmp}, % so we define a new name for it. % \begin{macrocode} \cs_new_eq:NN \@@_if_eq:nn \tex_strcmp:D % \end{macrocode} % \end{macro} % % \begin{macro}[pTF, EXP]{\str_compare:nNn, \str_compare:eNe} % Simply rely on \cs{@@_if_eq:nn}, which expands to |-1|, |0| % or~|1|. The |ee| version is created directly because it is more efficient. % \begin{macrocode} \prg_new_conditional:Npnn \str_compare:nNn #1#2#3 { p , T , F , TF } { \if_int_compare:w \@@_if_eq:nn { \exp_not:n {#1} } { \exp_not:n {#3} } #2 \c_zero_int \prg_return_true: \else: \prg_return_false: \fi: } \prg_new_conditional:Npnn \str_compare:eNe #1#2#3 { p , T , F , TF } { \if_int_compare:w \@@_if_eq:nn {#1} {#3} #2 \c_zero_int \prg_return_true: \else: \prg_return_false: \fi: } % \end{macrocode} % \end{macro} % % \begin{macro}[pTF, EXP] % { % \str_if_eq:nn, \str_if_eq:Vn, \str_if_eq:on, \str_if_eq:nV, % \str_if_eq:no, \str_if_eq:VV, % \str_if_eq:ee % } % Modern engines provide a direct way of comparing two token lists, % but returning a number. This set of conditionals therefore makes life % a bit clearer. The \texttt{nn} and \texttt{ee} versions are created % directly as this is most efficient. Since \cs{@@_if_eq:nn} will expand to % |0| as an explicit character with category 12 if the two lists match (and % either |-1| or |1| if they don't) we can use \cs{if:w} here which is faster % than using \cs{if_int_compare:w}. % \begin{macrocode} \prg_new_conditional:Npnn \str_if_eq:nn #1#2 { p , T , F , TF } { \if:w 0 \@@_if_eq:nn { \exp_not:n {#1} } { \exp_not:n {#2} } \prg_return_true: \else: \prg_return_false: \fi: } \prg_generate_conditional_variant:Nnn \str_if_eq:nn { V , v , o , nV , no , VV , nv } { p , T , F , TF } \prg_new_conditional:Npnn \str_if_eq:ee #1#2 { p , T , F , TF } { \if:w 0 \@@_if_eq:nn {#1} {#2} \prg_return_true: \else: \prg_return_false: \fi: } % \end{macrocode} % \end{macro} % % \begin{macro}[EXP, pTF] % {\str_if_eq:NN, \str_if_eq:Nc, \str_if_eq:cN, \str_if_eq:cc} % Note that \cs{str_if_eq:NNTF} is different from % \cs{tl_if_eq:NNTF} because it needs to ignore category codes. % \begin{macrocode} \prg_new_conditional:Npnn \str_if_eq:NN #1#2 { p , TF , T , F } { \if:w 0 \@@_if_eq:nn { \tl_to_str:N #1 } { \tl_to_str:N #2 } \prg_return_true: \else: \prg_return_false: \fi: } \prg_generate_conditional_variant:Nnn \str_if_eq:NN { c , Nc , cc } { T , F , TF , p } % \end{macrocode} % \end{macro} % % \begin{macro}[TF]{\str_if_in:Nn, \str_if_in:cn, \str_if_in:nn} % Everything here needs to be detokenized but beyond that it is a % simple token list test. It would be faster to fine-tune the |T|, % |F|, |TF| variants by calling the appropriate variant of % \cs{tl_if_in:nnTF} directly but that takes more code. % \begin{macrocode} \prg_new_protected_conditional:Npnn \str_if_in:Nn #1#2 { T , F , TF } { \use:e { \tl_if_in:nnTF { \tl_to_str:N #1 } { \tl_to_str:n {#2} } } { \prg_return_true: } { \prg_return_false: } } \prg_generate_conditional_variant:Nnn \str_if_in:Nn { c } { T , F , TF } \prg_new_protected_conditional:Npnn \str_if_in:nn #1#2 { T , F , TF } { \use:e { \tl_if_in:nnTF { \tl_to_str:n {#1} } { \tl_to_str:n {#2} } } { \prg_return_true: } { \prg_return_false: } } % \end{macrocode} % \end{macro} % % \begin{macro}[EXP, noTF] % { % \str_case:nn, \str_case:Vn, \str_case:Nn, \str_case:on, \str_case:en, \str_case:nV, \str_case:nv, % \str_case_e:nn, \str_case_e:en % } % \begin{macro}[EXP]{\@@_case:nnTF, \@@_case_e:nnTF} % \begin{macro}[EXP] % {\@@_case:nw, \@@_case_e:nw, \@@_case_end:nw} % The aim here is to allow the case statement to be evaluated % using a known number of expansion steps (two), and without % needing to use an explicit \enquote{end of recursion} marker. % That is achieved by using the test input as the final case, % as this is always true. The trick is then to tidy up % the output such that the appropriate case code plus either % the \texttt{true} or \texttt{false} branch code is inserted. % \begin{macrocode} \cs_new:Npn \str_case:nn #1#2 { \exp:w \@@_case:nnTF {#1} {#2} { } { } } \cs_new:Npn \str_case:nnT #1#2#3 { \exp:w \@@_case:nnTF {#1} {#2} {#3} { } } \cs_new:Npn \str_case:nnF #1#2 { \exp:w \@@_case:nnTF {#1} {#2} { } } \cs_new:Npn \str_case:nnTF #1#2 { \exp:w \@@_case:nnTF {#1} {#2} } \cs_new:Npn \@@_case:nnTF #1#2#3#4 { \@@_case:nw {#1} #2 {#1} { } \s_@@_mark {#3} \s_@@_mark {#4} \s_@@_stop } \cs_generate_variant:Nn \str_case:nn { V , o , e , nV , nv } \prg_generate_conditional_variant:Nnn \str_case:nn { V , o , e , nV , nv } { T , F , TF } \cs_new_eq:NN \str_case:Nn \str_case:Vn \cs_new_eq:NN \str_case:NnT \str_case:VnT \cs_new_eq:NN \str_case:NnF \str_case:VnF \cs_new_eq:NN \str_case:NnTF \str_case:VnTF \cs_new:Npn \@@_case:nw #1#2#3 { \str_if_eq:nnTF {#1} {#2} { \@@_case_end:nw {#3} } { \@@_case:nw {#1} } } \cs_new:Npn \str_case_e:nn #1#2 { \exp:w \@@_case_e:nnTF {#1} {#2} { } { } } \cs_new:Npn \str_case_e:nnT #1#2#3 { \exp:w \@@_case_e:nnTF {#1} {#2} {#3} { } } \cs_new:Npn \str_case_e:nnF #1#2 { \exp:w \@@_case_e:nnTF {#1} {#2} { } } \cs_new:Npn \str_case_e:nnTF #1#2 { \exp:w \@@_case_e:nnTF {#1} {#2} } \cs_new:Npn \@@_case_e:nnTF #1#2#3#4 { \@@_case_e:nw {#1} #2 {#1} { } \s_@@_mark {#3} \s_@@_mark {#4} \s_@@_stop } \cs_generate_variant:Nn \str_case_e:nn { e } \prg_generate_conditional_variant:Nnn \str_case_e:nn { e } { T , F , TF } \cs_new:Npn \@@_case_e:nw #1#2#3 { \str_if_eq:eeTF {#1} {#2} { \@@_case_end:nw {#3} } { \@@_case_e:nw {#1} } } % \end{macrocode} % To tidy up the recursion, there are two outcomes. If there was a hit to % one of the cases searched for, then |#1| is the code to insert, % |#2| is the \emph{next} case to check on and |#3| is all of % the rest of the cases code. That means that |#4| is the \texttt{true} % branch code, and |#5| tidies up the spare \cs{s_@@_mark} and the % \texttt{false} branch. On the other hand, if none of the cases matched % then we arrive here using the \enquote{termination} case of comparing % the search with itself. That means that |#1| is empty, |#2| is % the first \cs{s_@@_mark} and so |#4| is the \texttt{false} code (the % \texttt{true} code is mopped up by |#3|). % \begin{macrocode} \cs_new:Npn \@@_case_end:nw #1#2#3 \s_@@_mark #4#5 \s_@@_stop { \exp_end: #1 #4 } % \end{macrocode} % \end{macro} % \end{macro} % \end{macro} % % \subsection{Mapping over strings} % % \begin{macro}[rEXP]{\str_map_function:NN, \str_map_function:cN} % \begin{macro}[rEXP]{\str_map_function:nN} % \begin{macro}{\str_map_inline:Nn, \str_map_inline:cn} % \begin{macro}{\str_map_inline:nn} % \begin{macro}{\str_map_variable:NNn, \str_map_variable:cNn} % \begin{macro}{\str_map_variable:nNn} % \begin{macro}{\str_map_break:} % \begin{macro}{\str_map_break:n} % \begin{macro}[rEXP]{\@@_map_function:w, \@@_map_function:nn} % \begin{macro}{\@@_map_inline:NN, \@@_map_variable:NnN} % The inline and variable mappings are similar to the usual token list % mappings but start out by turning the argument to an ``other % string''. Doing the same for the expandable function mapping would % require \cs{__kernel_str_to_other:n}, quadratic in the string length. To deal % with spaces in that case, \cs{@@_map_function:w} replaces the % following space by a braced space and a further call to itself. % These are received by \cs{@@_map_function:nn}, which passes % the space to |#1| and calls \cs{@@_map_function:w} to deal with the % next space. The space before the braced space allows to optimize % the \cs{q_@@_recursion_tail} test. Of course we need to include a % trailing space (the question mark is needed to avoid losing the % space when \TeX{} tokenizes the line). % At the cost of about three more auxiliaries this code could get a $9$ % times speed up by testing only every $9$-th character for whether it % is \cs{q_@@_recursion_tail} (also by converting $9$ spaces at a time in % the \cs{str_map_function:nN} case). % % For the \texttt{map_variable} functions we use a string assignment % to store each character because spaces are made catcode~$12$ before % the loop. % \begin{macrocode} \cs_new:Npn \str_map_function:nN #1#2 { \exp_after:wN \@@_map_function:w \exp_after:wN \@@_map_function:nn \exp_after:wN #2 \__kernel_tl_to_str:w {#1} \q_@@_recursion_tail ? ~ \prg_break_point:Nn \str_map_break: { } } \cs_new:Npn \str_map_function:NN { \exp_args:No \str_map_function:nN } \cs_new:Npn \@@_map_function:w #1 ~ { #1 { ~ { ~ } \@@_map_function:w } } \cs_new:Npn \@@_map_function:nn #1#2 { \if_meaning:w \q_@@_recursion_tail #2 \exp_after:wN \str_map_break: \fi: #1 #2 \@@_map_function:nn {#1} } \cs_generate_variant:Nn \str_map_function:NN { c } \cs_new_protected:Npn \str_map_inline:nn #1#2 { \int_gincr:N \g__kernel_prg_map_int \cs_gset_protected:cpn { @@_map_ \int_use:N \g__kernel_prg_map_int :w } ##1 {#2} \use:e { \exp_not:N \@@_map_inline:NN \exp_not:c { @@_map_ \int_use:N \g__kernel_prg_map_int :w } \__kernel_str_to_other_fast:n {#1} } \q_@@_recursion_tail \prg_break_point:Nn \str_map_break: { \int_gdecr:N \g__kernel_prg_map_int } } \cs_new_protected:Npn \str_map_inline:Nn { \exp_args:No \str_map_inline:nn } \cs_generate_variant:Nn \str_map_inline:Nn { c } \cs_new:Npn \@@_map_inline:NN #1#2 { \@@_if_recursion_tail_break:NN #2 \str_map_break: \exp_args:No #1 { \token_to_str:N #2 } \@@_map_inline:NN #1 } \cs_new_protected:Npn \str_map_variable:nNn #1#2#3 { \use:e { \exp_not:n { \@@_map_variable:NnN #2 {#3} } \__kernel_str_to_other_fast:n {#1} } \q_@@_recursion_tail \prg_break_point:Nn \str_map_break: { } } \cs_new_protected:Npn \str_map_variable:NNn { \exp_args:No \str_map_variable:nNn } \cs_new_protected:Npn \@@_map_variable:NnN #1#2#3 { \@@_if_recursion_tail_break:NN #3 \str_map_break: \str_set:Nn #1 {#3} \use:n {#2} \@@_map_variable:NnN #1 {#2} } \cs_generate_variant:Nn \str_map_variable:NNn { c } \cs_new:Npn \str_map_break: { \prg_map_break:Nn \str_map_break: { } } \cs_new:Npn \str_map_break:n { \prg_map_break:Nn \str_map_break: } % \end{macrocode} % \end{macro} % \end{macro} % \end{macro} % \end{macro} % \end{macro} % \end{macro} % \end{macro} % \end{macro} % \end{macro} % \end{macro} % % \begin{macro}[rEXP]{\str_map_tokens:Nn, \str_map_tokens:cn} % \begin{macro}[rEXP]{\str_map_tokens:nn} % Uses an auxiliary of \cs{str_map_function:NN}. % \begin{macrocode} \cs_new:Npn \str_map_tokens:nn #1#2 { \exp_args:Nno \use:nn { \@@_map_function:w \@@_map_function:nn {#2} } { \__kernel_tl_to_str:w {#1} } \q_@@_recursion_tail ? ~ \prg_break_point:Nn \str_map_break: { } } \cs_new:Npn \str_map_tokens:Nn { \exp_args:No \str_map_tokens:nn } \cs_generate_variant:Nn \str_map_tokens:Nn { c } % \end{macrocode} % \end{macro} % \end{macro} % % \subsection{Accessing specific characters in a string} % % \begin{macro}[EXP]{\__kernel_str_to_other:n} % \begin{macro}[EXP]{\@@_to_other_loop:w, \@@_to_other_end:w} % First apply \cs{tl_to_str:n}, then replace all spaces by % \enquote{other} spaces, $8$ at a time, storing the converted part of % the string between the \cs{s_@@_mark} and \cs{s_@@_stop} markers. The end % is detected when \cs{@@_to_other_loop:w} finds one of the trailing % |A|, distinguished from any contents of the initial token list by % their category. Then \cs{@@_to_other_end:w} is called, and finds % the result between \cs{s_@@_mark} and the first |A| (well, there is % also the need to remove a space). % \begin{macrocode} \cs_new:Npn \__kernel_str_to_other:n #1 { \exp_after:wN \@@_to_other_loop:w \tl_to_str:n {#1} ~ A ~ A ~ A ~ A ~ A ~ A ~ A ~ A ~ \s_@@_mark \s_@@_stop } \group_begin: \tex_lccode:D `\* = `\ % \tex_lccode:D `\A = `\A % \tex_lowercase:D { \group_end: \cs_new:Npn \@@_to_other_loop:w #1 ~ #2 ~ #3 ~ #4 ~ #5 ~ #6 ~ #7 ~ #8 ~ #9 \s_@@_stop { \if_meaning:w A #8 \@@_to_other_end:w \fi: \@@_to_other_loop:w #9 #1 * #2 * #3 * #4 * #5 * #6 * #7 * #8 * \s_@@_stop } \cs_new:Npn \@@_to_other_end:w \fi: #1 \s_@@_mark #2 * A #3 \s_@@_stop { \fi: #2 } } % \end{macrocode} % \end{macro} % \end{macro} % % \begin{macro}[rEXP]{\__kernel_str_to_other_fast:n} % \begin{macro}[rEXP]{\__kernel_str_to_other_fast_loop:w, \@@_to_other_fast_end:w} % The difference with \cs{__kernel_str_to_other:n} is that the converted part is % left in the input stream, making these commands only % restricted-expandable. % \begin{macrocode} \cs_new:Npn \__kernel_str_to_other_fast:n #1 { \exp_after:wN \@@_to_other_fast_loop:w \tl_to_str:n {#1} ~ A ~ A ~ A ~ A ~ A ~ A ~ A ~ A ~ A ~ \s_@@_stop } \group_begin: \tex_lccode:D `\* = `\ % \tex_lccode:D `\A = `\A % \tex_lowercase:D { \group_end: \cs_new:Npn \@@_to_other_fast_loop:w #1 ~ #2 ~ #3 ~ #4 ~ #5 ~ #6 ~ #7 ~ #8 ~ #9 ~ { \if_meaning:w A #9 \@@_to_other_fast_end:w \fi: #1 * #2 * #3 * #4 * #5 * #6 * #7 * #8 * #9 \@@_to_other_fast_loop:w * } \cs_new:Npn \@@_to_other_fast_end:w #1 * A #2 \s_@@_stop {#1} } % \end{macrocode} % \end{macro} % \end{macro} % % \begin{macro}[EXP] % {\str_item:Nn, \str_item:cn, \str_item:nn, \str_item_ignore_spaces:nn} % \begin{macro}[EXP]{\@@_item:nn, \@@_item:w} % The \cs{str_item:nn} hands its argument with spaces escaped to % \cs{@@_item:nn}, and makes sure to turn the result back into % a proper string (with category code~$10$ spaces) eventually. The % \cs{str_item_ignore_spaces:nn} function does not escape spaces, % which are thus ignored by \cs{@@_item:nn} since % everything else is done with undelimited arguments. % Evaluate the \meta{index} argument~|#2| and count characters in % the string, passing those two numbers to \cs{@@_item:w} for % further analysis. If the \meta{index} is negative, shift it by % the \meta{count} to know the how many character to discard, and if % that is still negative give an empty result. If the \meta{index} % is larger than the \meta{count}, give an empty result, and % otherwise discard $\meta{index}-1$ characters before returning the % following one. The shift by $-1$ is obtained by inserting an empty % brace group before the string in that case: that brace group also % covers the case where the \meta{index} is zero. % \begin{macrocode} \cs_new:Npn \str_item:Nn { \exp_args:No \str_item:nn } \cs_generate_variant:Nn \str_item:Nn { c } \cs_new:Npn \str_item:nn #1#2 { \exp_args:Nf \tl_to_str:n { \exp_args:Nf \@@_item:nn { \__kernel_str_to_other:n {#1} } {#2} } } \cs_new:Npn \str_item_ignore_spaces:nn #1 { \exp_args:No \@@_item:nn { \tl_to_str:n {#1} } } \cs_new:Npn \@@_item:nn #1#2 { \exp_after:wN \@@_item:w \int_value:w \int_eval:n {#2} \exp_after:wN ; \int_value:w \@@_count:n {#1} ; #1 \s_@@_stop } \cs_new:Npn \@@_item:w #1; #2; { \int_compare:nNnTF {#1} < 0 { \int_compare:nNnTF {#1} < {-#2} { \@@_use_none_delimit_by_s_stop:w } { \exp_after:wN \@@_use_i_delimit_by_s_stop:nw \exp:w \exp_after:wN \@@_skip_exp_end:w \int_value:w \int_eval:n { #1 + #2 } ; } } { \int_compare:nNnTF {#1} > {#2} { \@@_use_none_delimit_by_s_stop:w } { \exp_after:wN \@@_use_i_delimit_by_s_stop:nw \exp:w \@@_skip_exp_end:w #1 ; { } } } } % \end{macrocode} % \end{macro} % \end{macro} % % \begin{macro}[EXP]{\@@_skip_exp_end:w} % \begin{macro}[EXP] % {\@@_skip_loop:wNNNNNNNN, \@@_skip_end:w, \@@_skip_end:NNNNNNNN} % Removes |max(#1,0)| characters from the input stream, and then % leaves \cs{exp_end:}. This should be expanded using % \cs{exp:w}. We remove characters $8$ at a time until % there are at most $8$ to remove. Then we do a dirty trick: the % \cs{if_case:w} construction leaves between $0$ and $8$ times the % \cs{or:} control sequence, and those \cs{or:} become arguments of % \cs{@@_skip_end:NNNNNNNN}. If the number of characters to remove % is $6$, say, then there are two \cs{or:} left, and the $8$ arguments % of \cs{@@_skip_end:NNNNNNNN} are the two \cs{or:}, and $6$ % characters from the input stream, exactly what we wanted to % remove. Then close the \cs{if_case:w} conditional with \cs{fi:}, and % stop the initial expansion with \cs{exp_end:} (see places where % \cs{@@_skip_exp_end:w} is called). % \begin{macrocode} \cs_new:Npn \@@_skip_exp_end:w #1; { \if_int_compare:w #1 > 8 \exp_stop_f: \exp_after:wN \@@_skip_loop:wNNNNNNNN \else: \exp_after:wN \@@_skip_end:w \int_value:w \int_eval:w \fi: #1 ; } \cs_new:Npn \@@_skip_loop:wNNNNNNNN #1; #2#3#4#5#6#7#8#9 { \exp_after:wN \@@_skip_exp_end:w \int_value:w \int_eval:n { #1 - 8 } ; } \cs_new:Npn \@@_skip_end:w #1 ; { \exp_after:wN \@@_skip_end:NNNNNNNN \if_case:w #1 \exp_stop_f: \or: \or: \or: \or: \or: \or: \or: \or: } \cs_new:Npn \@@_skip_end:NNNNNNNN #1#2#3#4#5#6#7#8 { \fi: \exp_end: } % \end{macrocode} % \end{macro} % \end{macro} % % \begin{macro}[EXP] % {\str_range:Nnn, \str_range:nnn, \str_range_ignore_spaces:nnn} % \begin{macro}[EXP]{\@@_range:nnn} % \begin{macro}[EXP]{\@@_range:w, \@@_range:nnw} % Sanitize the string. Then evaluate the arguments. At this stage we % also decrement the \meta{start index}, since our goal is to know how % many characters should be removed. Then limit the range to be % non-negative and at most the length of the string (this avoids % needing to check for the end of the string when grabbing % characters), shifting negative numbers by the appropriate amount. % Afterwards, skip characters, then keep some more, and finally drop % the end of the string. % \begin{macrocode} \cs_new:Npn \str_range:Nnn { \exp_args:No \str_range:nnn } \cs_generate_variant:Nn \str_range:Nnn { c } \cs_new:Npn \str_range:nnn #1#2#3 { \exp_args:Nf \tl_to_str:n { \exp_args:Nf \@@_range:nnn { \__kernel_str_to_other:n {#1} } {#2} {#3} } } \cs_new:Npn \str_range_ignore_spaces:nnn #1 { \exp_args:No \@@_range:nnn { \tl_to_str:n {#1} } } \cs_new:Npn \@@_range:nnn #1#2#3 { \exp_after:wN \@@_range:w \int_value:w \@@_count:n {#1} \exp_after:wN ; \int_value:w \int_eval:n { (#2) - 1 } \exp_after:wN ; \int_value:w \int_eval:n {#3} ; #1 \s_@@_stop } \cs_new:Npn \@@_range:w #1; #2; #3; { \exp_args:Nf \@@_range:nnw { \@@_range_normalize:nn {#2} {#1} } { \@@_range_normalize:nn {#3} {#1} } } \cs_new:Npn \@@_range:nnw #1#2 { \exp_after:wN \@@_collect_delimit_by_q_stop:w \int_value:w \int_eval:n { #2 - #1 } \exp_after:wN ; \exp:w \@@_skip_exp_end:w #1 ; } % \end{macrocode} % \end{macro} % \end{macro} % \end{macro} % \begin{macro}[EXP]{\@@_range_normalize:nn} % This function converts an \meta{index} argument into an explicit % position in the string (a result of $0$ denoting \enquote{out of % bounds}). Expects two explicit integer arguments: the % \meta{index} |#1| and the string count~|#2|. If |#1| is negative, % replace it by $|#1| + |#2| + 1$, then limit to the range $[0, % |#2|]$. % \begin{macrocode} \cs_new:Npn \@@_range_normalize:nn #1#2 { \int_eval:n { \if_int_compare:w #1 < \c_zero_int \if_int_compare:w #1 < -#2 \exp_stop_f: 0 \else: #1 + #2 + 1 \fi: \else: \if_int_compare:w #1 < #2 \exp_stop_f: #1 \else: #2 \fi: \fi: } } % \end{macrocode} % \end{macro} % \begin{macro}[EXP]{\@@_collect_delimit_by_q_stop:w} % \begin{macro}[EXP] % { % \@@_collect_loop:wn, \@@_collect_loop:wnNNNNNNN, % \@@_collect_end:wn, \@@_collect_end:nnnnnnnnw % } % Collects |max(#1,0)| characters, and removes everything else until % \cs{s_@@_stop}. This is somewhat similar to \cs{@@_skip_exp_end:w}, but % accepts integer expression arguments. This time we can only grab % $7$ characters at a time. At the end, we use an \cs{if_case:w} % trick again, so that the $8$ first arguments of % \cs{@@_collect_end:nnnnnnnnw} are some \cs{or:}, followed by an % \cs{fi:}, followed by |#1| characters from the input stream. Simply % leaving this in the input stream closes the conditional properly % and the \cs{or:} disappear. % \begin{macrocode} \cs_new:Npn \@@_collect_delimit_by_q_stop:w #1; { \@@_collect_loop:wn #1 ; { } } \cs_new:Npn \@@_collect_loop:wn #1 ; { \if_int_compare:w #1 > 7 \exp_stop_f: \exp_after:wN \@@_collect_loop:wnNNNNNNN \else: \exp_after:wN \@@_collect_end:wn \fi: #1 ; } \cs_new:Npn \@@_collect_loop:wnNNNNNNN #1; #2 #3#4#5#6#7#8#9 { \exp_after:wN \@@_collect_loop:wn \int_value:w \int_eval:n { #1 - 7 } ; { #2 #3#4#5#6#7#8#9 } } \cs_new:Npn \@@_collect_end:wn #1 ; { \exp_after:wN \@@_collect_end:nnnnnnnnw \if_case:w \if_int_compare:w #1 > \c_zero_int #1 \else: 0 \fi: \exp_stop_f: \or: \or: \or: \or: \or: \or: \fi: } \cs_new:Npn \@@_collect_end:nnnnnnnnw #1#2#3#4#5#6#7#8 #9 \s_@@_stop { #1#2#3#4#5#6#7#8 } % \end{macrocode} % \end{macro} % \end{macro} % % \subsection{Counting characters} % % \begin{macro}[EXP] % {\str_count_spaces:N, \str_count_spaces:c, \str_count_spaces:n} % \begin{macro}[EXP]{\@@_count_spaces_loop:w} % To speed up this function, we grab and discard $9$ space-delimited % arguments in each iteration of the loop. The loop stops when the % last argument is one of the trailing |X|\meta{number}, and that % \meta{number} is added to the sum of $9$ that precedes, to adjust % the result. % \begin{macrocode} \cs_new:Npn \str_count_spaces:N { \exp_args:No \str_count_spaces:n } \cs_generate_variant:Nn \str_count_spaces:N { c } \cs_new:Npn \str_count_spaces:n #1 { \int_eval:n { \exp_after:wN \@@_count_spaces_loop:w \tl_to_str:n {#1} ~ X 7 ~ X 6 ~ X 5 ~ X 4 ~ X 3 ~ X 2 ~ X 1 ~ X 0 ~ X -1 ~ \s_@@_stop } } \cs_new:Npn \@@_count_spaces_loop:w #1~#2~#3~#4~#5~#6~#7~#8~#9~ { \if_meaning:w X #9 \@@_use_i_delimit_by_s_stop:nw \fi: 9 + \@@_count_spaces_loop:w } % \end{macrocode} % \end{macro} % \end{macro} % % \begin{macro}[EXP] % {\str_count:N, \str_count:c, \str_count:n, \str_count_ignore_spaces:n} % \begin{macro}[EXP]{\@@_count:n} % \begin{macro}[EXP]{\@@_count_aux:n, \@@_count_loop:NNNNNNNNN} % To count characters in a string we could first escape all spaces % using \cs{__kernel_str_to_other:n}, then pass the result to \cs{tl_count:n}. % However, the escaping step would be quadratic in the number of % characters in the string, and we can do better. Namely, sum the % number of spaces (\cs{str_count_spaces:n}) and the result of % \cs{tl_count:n}, which ignores spaces. Since strings tend to be % longer than token lists, we use specialized functions to count % characters ignoring spaces. Namely, loop, grabbing $9$ non-space % characters at each step, and end as soon as we reach one of the $9$ % trailing items. The internal function \cs{@@_count:n}, used in % \cs{str_item:nn} and \cs{str_range:nnn}, is similar to % \cs{str_count_ignore_spaces:n} but expects its argument to already % be a string or a string with spaces escaped. % \begin{macrocode} \cs_new:Npn \str_count:N { \exp_args:No \str_count:n } \cs_generate_variant:Nn \str_count:N { c } \cs_new:Npn \str_count:n #1 { \@@_count_aux:n { \str_count_spaces:n {#1} + \exp_after:wN \@@_count_loop:NNNNNNNNN \tl_to_str:n {#1} } } \cs_new:Npn \@@_count:n #1 { \@@_count_aux:n { \@@_count_loop:NNNNNNNNN #1 } } \cs_new:Npn \str_count_ignore_spaces:n #1 { \@@_count_aux:n { \exp_after:wN \@@_count_loop:NNNNNNNNN \tl_to_str:n {#1} } } \cs_new:Npn \@@_count_aux:n #1 { \int_eval:n { #1 { X 8 } { X 7 } { X 6 } { X 5 } { X 4 } { X 3 } { X 2 } { X 1 } { X 0 } \s_@@_stop } } \cs_new:Npn \@@_count_loop:NNNNNNNNN #1#2#3#4#5#6#7#8#9 { \if_meaning:w X #9 \exp_after:wN \@@_use_none_delimit_by_s_stop:w \fi: 9 + \@@_count_loop:NNNNNNNNN } % \end{macrocode} % \end{macro} % \end{macro} % \end{macro} % % \subsection{The first character in a string} % % \begin{macro}[EXP] % {\str_head:N, \str_head:c, \str_head:n, \str_head_ignore_spaces:n} % \begin{macro}[EXP]{\@@_head:w} % The \texttt{_ignore_spaces} variant applies \cs{tl_to_str:n} then % grabs the first item, thus skipping spaces. % As usual, \cs{str_head:N} expands its argument and % hands it to \cs{str_head:n}. To circumvent the fact that \TeX{} % skips spaces when grabbing undelimited macro parameters, % \cs{@@_head:w} takes an argument delimited by a space. If |#1| % starts with a non-space character, \cs{@@_use_i_delimit_by_s_stop:nw} % leaves that in the input stream. On the other hand, if |#1| starts % with a space, the \cs{@@_head:w} takes an empty argument, and the % single (initially braced) space in the definition of \cs{@@_head:w} % makes its way to the output. Finally, for an empty argument, the % (braced) empty brace group in the definition of \cs{str_head:n} % gives an empty result after passing through % \cs{@@_use_i_delimit_by_s_stop:nw}. % \begin{macrocode} \cs_new:Npn \str_head:N { \exp_args:No \str_head:n } \cs_generate_variant:Nn \str_head:N { c } \cs_new:Npn \str_head:n #1 { \exp_after:wN \@@_head:w \tl_to_str:n {#1} { { } } ~ \s_@@_stop } \cs_new:Npn \@@_head:w #1 ~ % { \@@_use_i_delimit_by_s_stop:nw #1 { ~ } } \cs_new:Npn \str_head_ignore_spaces:n #1 { \exp_after:wN \@@_use_i_delimit_by_s_stop:nw \tl_to_str:n {#1} { } \s_@@_stop } % \end{macrocode} % \end{macro} % \end{macro} % % \begin{macro}[EXP] % {\str_tail:N, \str_tail:c, \str_tail:n, \str_tail_ignore_spaces:n} % \begin{macro}[EXP]{\@@_tail_auxi:w, \@@_tail_auxii:w} % Getting the tail is a little bit more convoluted than the head of a % string. We hit the front of the string with \cs{reverse_if:N} % \cs{if_charcode:w} \cs{scan_stop:}. This removes the first % character, and necessarily makes the test true, since the character % cannot match \cs{scan_stop:}. The auxiliary function then inserts % the required \cs{fi:} to close the conditional, and leaves the tail % of the string in the input stream. The details are such that an % empty string has an empty tail (this requires in particular that the % end-marker |X| be unexpandable and not a control sequence). The % \texttt{_ignore_spaces} is rather simpler: after converting the % input to a string, \cs{@@_tail_auxii:w} removes one undelimited % argument and leaves everything else until an end-marker \cs{s_@@_mark}. % One can check that an empty (or blank) string yields an empty % tail. % \begin{macrocode} \cs_new:Npn \str_tail:N { \exp_args:No \str_tail:n } \cs_generate_variant:Nn \str_tail:N { c } \cs_new:Npn \str_tail:n #1 { \exp_after:wN \@@_tail_auxi:w \reverse_if:N \if_charcode:w \scan_stop: \tl_to_str:n {#1} X X \s_@@_stop } \cs_new:Npn \@@_tail_auxi:w #1 X #2 \s_@@_stop { \fi: #1 } \cs_new:Npn \str_tail_ignore_spaces:n #1 { \exp_after:wN \@@_tail_auxii:w \tl_to_str:n {#1} \s_@@_mark \s_@@_mark \s_@@_stop } \cs_new:Npn \@@_tail_auxii:w #1 #2 \s_@@_mark #3 \s_@@_stop { #2 } % \end{macrocode} % \end{macro} % \end{macro} % % \subsection{String manipulation} % % \begin{macro}[EXP] % { % \str_casefold:n, \str_casefold:V, % \str_lowercase:n, \str_lowercase:f, % \str_uppercase:n, \str_uppercase:f % } % \begin{macro}[EXP]{\@@_change_case:nn} % \begin{macro}[EXP]{\@@_change_case_aux:nn} % \begin{macro}[EXP]{\@@_change_case_result:n} % \begin{macro}[EXP]{\@@_change_case_output:nw, \@@_change_case_output:fw} % \begin{macro}[EXP]{\@@_change_case_end:nw} % \begin{macro}[EXP]{\@@_change_case_loop:nw} % \begin{macro}[EXP]{\@@_change_case_space:n} % \begin{macro}[EXP] % {\@@_change_case_char:nN, \@@_change_case_char_auxi:nN, \@@_change_case_char_auxii:nN} % \begin{macro}[EXP]{\@@_change_case_codepoint:nN} % \begin{macro}[EXP]{\@@_change_case_codepoint:nNN} % \begin{macro}[EXP]{\@@_change_case_codepoint:nNNN} % \begin{macro}[EXP]{\@@_change_case_codepoint:nNNNN} % \begin{macro}[EXP]{\@@_change_case_char:nnn, \@@_change_case_char_aux:nnn} % \begin{macro}[EXP]{\@@_change_case_char:nnnnn} % Case changing for programmatic reasons is done by first detokenizing % input then doing a simple loop that only has to worry about spaces % and everything else. The output is detokenized to allow data sharing % with text-based case changing. Similarly, for $8$-bit engines the % multi-byte information is shared. % \begin{macrocode} \cs_new:Npn \str_casefold:n #1 { \@@_change_case:nn {#1} { casefold } } \cs_new:Npn \str_lowercase:n #1 { \@@_change_case:nn {#1} { lowercase } } \cs_new:Npn \str_uppercase:n #1 { \@@_change_case:nn {#1} { uppercase } } \cs_generate_variant:Nn \str_casefold:n { V } \cs_generate_variant:Nn \str_lowercase:n { f } \cs_generate_variant:Nn \str_uppercase:n { f } \cs_new:Npn \@@_change_case:nn #1 { \exp_after:wN \@@_change_case_aux:nn \exp_after:wN { \tl_to_str:n {#1} } } \cs_new:Npn \@@_change_case_aux:nn #1#2 { \@@_change_case_loop:nw {#2} #1 \q_@@_recursion_tail \q_@@_recursion_stop \@@_change_case_result:n { } } \cs_new:Npn \@@_change_case_output:nw #1#2 \@@_change_case_result:n #3 { #2 \@@_change_case_result:n { #3 #1 } } \cs_generate_variant:Nn \@@_change_case_output:nw { f } \cs_new:Npn \@@_change_case_end:wn #1 \@@_change_case_result:n #2 { \tl_to_str:n {#2} } \cs_new:Npn \@@_change_case_loop:nw #1#2 \q_@@_recursion_stop { \tl_if_head_is_space:nTF {#2} { \@@_change_case_space:n } { \@@_change_case_char:nN } {#1} #2 \q_@@_recursion_stop } \exp_last_unbraced:NNNNo \cs_new:Npn \@@_change_case_space:n #1 \c_space_tl { \@@_change_case_output:nw { ~ } \@@_change_case_loop:nw {#1} } \cs_new:Npn \@@_change_case_char:nN #1#2 { \@@_if_recursion_tail_stop_do:Nn #2 { \@@_change_case_end:wn } \@@_change_case_codepoint:nN {#1} #2 } \if_int_compare:w 0 \cs_if_exist:NT \tex_XeTeXversion:D { 1 } \cs_if_exist:NT \tex_luatexversion:D { 1 } > 0 \exp_stop_f: \cs_new:Npn \@@_change_case_codepoint:nN #1#2 { \@@_change_case_char:fnn { \int_eval:n {`#2} } {#1} {#2} } \else: \cs_new:Npe \@@_change_case_codepoint:nN #1#2 { \exp_not:N \int_compare:nNnTF {`#2} > { "80 } { \cs_if_exist:NTF \tex_pdftexversion:D { \exp_not:N \@@_change_case_char_auxi:nN } { \exp_not:N \int_compare:nNnTF {`#2} > { "FF } { \exp_not:N \@@_change_case_char_auxii:nN } { \exp_not:N \@@_change_case_char_auxi:nN } } } { \exp_not:N \@@_change_case_char_auxii:nN } {#1} #2 } \cs_new:Npn \@@_change_case_char_auxi:nN #1#2 { \int_compare:nNnTF {`#2} < { "E0 } { \@@_change_case_codepoint:nNN } { \int_compare:nNnTF {`#2} < { "F0 } { \@@_change_case_codepoint:nNNN } { \@@_change_case_codepoint:nNNNNN } } {#1} #2 } \cs_new:Npn \@@_change_case_char_auxii:nN #1#2 { \@@_change_case_char:fnn { \int_eval:n {`#2} } {#1} {#2} } \cs_new:Npn \@@_change_case_codepoint:nNN #1#2#3 { \@@_change_case_char:fnn { \int_eval:n { (`#2 - "C0) * "40 + `#3 - "80 } } {#1} {#2#3} } \cs_new:Npn \@@_change_case_codepoint:nNNN #1#2#3#4 { \@@_change_case_char:fnn { \int_eval:n { (`#2 - "E0) * "1000 + (`#3 - "80) * "40 + `#4 - "80 } } {#1} {#2#3#4} } \cs_new:Npn \@@_change_case_codepoint:nNNNN #1#2#3#4#5 { \@@_change_case_char:fnn { \int_eval:n { (`#2 - "F0) * "40000 + (`#3 - "80) * "1000 + (`#4 - "80) * "40 + `#5 - "80 } } {#1} {#2#3#4#5} } \fi: \cs_new:Npn \@@_change_case_char:nnn #1#2#3 { \@@_change_case_output:fw { \exp_args:Ne \@@_change_case_char_aux:nnn { \__kernel_codepoint_case:nn {#2} {#1} } {#1} {#3} } \@@_change_case_loop:nw {#2} } \cs_generate_variant:Nn \@@_change_case_char:nnn { f } \cs_new:Npn \@@_change_case_char_aux:nnn #1#2#3 { \use:e { \@@_change_case_char:nnnnn #1 {#2} {#3} } } \cs_new:Npn \@@_change_case_char:nnnnn #1#2#3#4#5 { \int_compare:nNnTF {#1} = {#4} { \tl_to_str:n {#5} } { \codepoint_str_generate:n {#1} \tl_if_blank:nF {#2} { \codepoint_str_generate:n {#2} \tl_if_blank:nF {#3} { \codepoint_str_generate:n {#3} } } } } % \end{macrocode} % \end{macro} % \end{macro} % \end{macro} % \end{macro} % \end{macro} % \end{macro} % \end{macro} % \end{macro} % \end{macro} % \end{macro} % \end{macro} % \end{macro} % \end{macro} % \end{macro} % \end{macro} % % \begin{macro}[EXP]{\str_mdfive_hash:n, \str_mdfive_hash:e} % \begin{macrocode} \cs_new:Npn \str_mdfive_hash:n #1 { \tex_mdfivesum:D { \tl_to_str:n {#1} } } \cs_new:Npn \str_mdfive_hash:e #1 { \tex_mdfivesum:D {#1} } % \end{macrocode} % \end{macro} % % \begin{variable} % { % \c_ampersand_str, % \c_atsign_str, % \c_backslash_str, % \c_left_brace_str, % \c_right_brace_str, % \c_circumflex_str, % \c_colon_str, % \c_dollar_str, % \c_hash_str, % \c_percent_str, % \c_tilde_str, % \c_underscore_str, % \c_zero_str % } % For all of those strings, use \cs{cs_to_str:N} to get characters with % the correct category code without worries % \begin{macrocode} \str_const:Ne \c_ampersand_str { \cs_to_str:N \& } \str_const:Ne \c_atsign_str { \cs_to_str:N \@ } \str_const:Ne \c_backslash_str { \cs_to_str:N \\ } \str_const:Ne \c_left_brace_str { \cs_to_str:N \{ } \str_const:Ne \c_right_brace_str { \cs_to_str:N \} } \str_const:Ne \c_circumflex_str { \cs_to_str:N \^ } \str_const:Ne \c_colon_str { \cs_to_str:N \: } \str_const:Ne \c_dollar_str { \cs_to_str:N \$ } \str_const:Ne \c_hash_str { \cs_to_str:N \# } \str_const:Ne \c_percent_str { \cs_to_str:N \% } \str_const:Ne \c_tilde_str { \cs_to_str:N \~ } \str_const:Ne \c_underscore_str { \cs_to_str:N \_ } \str_const:Ne \c_zero_str { 0 } % \end{macrocode} % \end{variable} % % \begin{variable}{\c_empty_str} % An empty string is simply an empty token list. % \begin{macrocode} \cs_new_eq:NN \c_empty_str \c_empty_tl % \end{macrocode} % \end{variable} % % \begin{variable}{\l_tmpa_str, \l_tmpb_str, \g_tmpa_str, \g_tmpb_str} % Scratch strings. % \begin{macrocode} \str_new:N \l_tmpa_str \str_new:N \l_tmpb_str \str_new:N \g_tmpa_str \str_new:N \g_tmpb_str % \end{macrocode} % \end{variable} % % \subsection{Viewing strings} % % \begin{macro}{\str_show:n, \str_show:N, \str_show:c} % \begin{macro}{\str_log:n, \str_log:N, \str_log:c} % Displays a string on the terminal. % \begin{macrocode} \cs_new_eq:NN \str_show:n \tl_show:n \cs_new_protected:Npn \str_show:N #1 { \__kernel_chk_tl_type:NnnT #1 { str } { \tl_to_str:N #1 } { \tl_show:N #1 } } \cs_generate_variant:Nn \str_show:N { c } \cs_new_eq:NN \str_log:n \tl_log:n \cs_new_protected:Npn \str_log:N #1 { \__kernel_chk_tl_type:NnnT #1 { str } { \tl_to_str:N #1 } { \tl_log:N #1 } } \cs_generate_variant:Nn \str_log:N { c } % \end{macrocode} % \end{macro} % \end{macro} % % \begin{macrocode} % % \end{macrocode} % % \end{implementation} % % \PrintIndex