%Version 3.1 December 2024
% See section 11 of the User Manual for version history
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%                                                                 %%
%% Please do not use \input{...} to include other tex files.       %%
%% Submit your LaTeX manuscript as one .tex document.              %%
%%                                                                 %%
%% All additional figures and files should be attached             %%
%% separately and not embedded in the \TeX\ document itself.       %%
%%                                                                 %%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%\documentclass[referee,sn-basic]{sn-jnl}% referee option is meant for double line spacing

%%=======================================================%%
%% to print line numbers in the margin use lineno option %%
%%=======================================================%%

%%\documentclass[lineno,pdflatex,sn-basic]{sn-jnl}% Basic Springer Nature Reference Style/Chemistry Reference Style

%%=========================================================================================%%
%% the documentclass is set to pdflatex as default. You can delete it if not appropriate.  %%
%%=========================================================================================%%

%%\documentclass[sn-basic]{sn-jnl}% Basic Springer Nature Reference Style/Chemistry Reference Style

%%Note: the following reference styles support Namedate and Numbered referencing. By default the style follows the most common style. To switch between the options you can add or remove Numbered in the optional parenthesis. 
%%The option is available for: sn-basic.bst, sn-chicago.bst%  
 
%%\documentclass[pdflatex,sn-nature]{sn-jnl}% Style for submissions to Nature Portfolio journals
\documentclass[pdflatex,sn-basic]{sn-jnl}% Basic Springer Nature Reference Style/Chemistry Reference Style
%%\documentclass[pdflatex,sn-mathphys-num]{sn-jnl}% Math and Physical Sciences Numbered Reference Style
%%\documentclass[pdflatex,sn-mathphys-ay]{sn-jnl}% Math and Physical Sciences Author Year Reference Style
%%\documentclass[pdflatex,sn-aps]{sn-jnl}% American Physical Society (APS) Reference Style
%%\documentclass[pdflatex,sn-vancouver-num]{sn-jnl}% Vancouver Numbered Reference Style
%%\documentclass[pdflatex,sn-vancouver-ay]{sn-jnl}% Vancouver Author Year Reference Style
%%\documentclass[pdflatex,sn-apa]{sn-jnl}% APA Reference Style
%%\documentclass[pdflatex,sn-chicago]{sn-jnl}% Chicago-based Humanities Reference Style

%%%% Standard Packages
%%<additional latex packages if required can be included here>

\usepackage{graphicx}%
\usepackage{multirow}%
\usepackage{amsmath,amssymb,amsfonts}%
\usepackage{amsthm}%
\usepackage{mathrsfs}%
\usepackage[title]{appendix}%
\usepackage{xcolor}%
\usepackage{textcomp}%
\usepackage{manyfoot}%
\usepackage{booktabs}%
\usepackage{algorithm}%
\usepackage{algorithmicx}%
\usepackage{algpseudocode}%
\usepackage{listings}%
\usepackage[acronym]{glossaries}

\usepackage{todonotes}

%%%%

\newacronym{SAR}{SAR}{structure-activity relationship}
\newacronym{GPCR}{GPCR}{G-protein-coupled receptor}
\newacronym{LBVS}{LBVS}{ligand-based virtual screening}
\newacronym{SBVS}{SBVS}{structure-based virtual screening}
\newacronym{ECFP}{ECFP}{extended-connectivity fingerprints}
\newacronym{QSAR}{QSAR}{quantitative structure-activity relationship}
\newacronym{EF1}{EF@1\%}{enrichment factor at 1\%}
\newacronym{ROC}{ROC}{receiver operating characteristic}
\newacronym{AUC}{AUC}{area under the curve}
\newacronym{PR}{PR}{precision-recall}
\newacronym{RIE}{RIE}{robust initial enhancement}
\newacronym{BEDROC}{BEDROC}{Boltzmann-enhanced discrimination of ROC}
\newacronym{MF}{MF}{molecular function}
\newacronym{RS}{RS}{reference set}
\newacronym{UMAP}{UMAP}{uniform manifold approximation and projection}
\newacronym{PCA}{PCA}{principal component analysis}
\newacronym{SVM}{SVM}{support vector machine}
\newacronym{1-NN}{1-NN}{1-nearest-neighbor}

%%%%
%% as per the requirement new theorem styles can be included as shown below
\theoremstyle{thmstyleone}%
\newtheorem{theorem}{Theorem}%  meant for continuous numbers
%%\newtheorem{theorem}{Theorem}[section]% meant for sectionwise numbers
%% optional argument [theorem] produces theorem numbering sequence instead of independent numbers for Proposition
\newtheorem{proposition}[theorem]{Proposition}% 
%%\newtheorem{proposition}{Proposition}% to get separate numbers for theorem and proposition etc.

\theoremstyle{thmstyletwo}%
\newtheorem{example}{Example}%
\newtheorem{remark}{Remark}%

\theoremstyle{thmstylethree}%
\newtheorem{definition}{Definition}%

\raggedbottom
%%\unnumbered% uncomment this for unnumbered level heads

\begin{document}

\title{Supplementary Material for:\\
``Molecular Function-guided Unsupervised Similarity-based Enrichment for Low-Data Virtual Screening''}

\author*[1,2]{\fnm{Alexander} \sur{Hagg}}\email{alexander.hagg@h-brs.de}

\author[1,2,3]{\fnm{Dirk} \sur{Reith}}\email{dirk.reith@h-brs.de}

\author[2,4,5]{\fnm{Matthias} \sur{Preller}}\email{matthias.preller@h-brs.de}

\author[2,6]{\fnm{Karl N.} \sur{Kirschner}}\email{karl.kirschner@h-brs.de}


\affil*[1]{\orgdiv{Department of Engineering and Communication}, \orgname{University of Applied Sciences Bonn-Rhein-Sieg}, \orgaddress{\street{Grantham-Allee 20}, \city{Sankt Augustin}, \postcode{53757}, \country{Germany}}}

\affil*[2]{\orgdiv{Institute of Technology, Resource and Energy-efficient Engineering}, \orgname{University of Applied Sciences Bonn-Rhein-Sieg}, \orgaddress{\street{Grantham-Allee 20}, \city{Sankt Augustin}, \postcode{53757}, \country{Germany}}}

\affil[3]{\orgname{Fraunhofer Institute for Algorithms and Scientific Computing}, \orgaddress{\street{Schloss Birlinghoven}, \city{Sankt Augustin}, \postcode{53754}, \country{Germany}}}

\affil[4]{\orgdiv{Department of Natural Sciences}, \orgname{University of Applied Sciences Bonn-Rhein-Sieg}, \orgaddress{\street{Von-Liebig-Straße 20}, \city{Rheinbach}, \postcode{53359}, \country{Germany}}}

\affil[5]{\orgdiv{Institute for Functional Gene Analytics}, \orgname{University of Applied Sciences Bonn-Rhein-Sieg}, \orgaddress{\street{Von-Liebig-Straße 20}, \city{Rheinbach}, \postcode{53359}, \country{Germany}}}

\affil*[6]{\orgdiv{Department of Computer Science}, \orgname{University of Applied Sciences Bonn-Rhein-Sieg}, \orgaddress{\street{Grantham-Allee 20}, \city{Sankt Augustin}, \postcode{53757}, \country{Germany}}}

\maketitle

\section{Extended Related Work}\label{SI:related_work}
This section provides extended context on virtual screening metrics, baseline screening performance, deep learning architectures, and dimensionality reduction methodologies as documented in recent literature.

\subsection{Metrics and Benchmarks in Virtual Screening}
Quantifying the ability of virtual screening methods to prioritize active compounds -- known as enrichment -- is fundamental to evaluating their utility in early-stage drug discovery. While the literature presents a diverse array of evaluation metrics and benchmark datasets \cite{mcgaughey2007comparison, sciabola2022critical}, which often complicate direct cross-study comparisons, robust metrics such as the \gls{EF1} have become standard for assessing early retrieval success. Since the datasets used in previous studies differ from the molecular function-guided configuration employed here, the performance figures below provide a general context for indirect comparison rather than a direct benchmark.

A critical analysis of structure-based virtual screening (\gls{SBVS}) benchmarks highlight that established resources such as the Directory of Useful Decoys (DUD) and its enhanced version (DUD-E) \cite{mysinger2012directory} exhibit severe analogue and decoy biases \cite{rohrer2009maximum}. These systematic biases can inflate performance estimates by 2--5 times relative to more recent, unbiased benchmarks such as LIT-PCBA \cite{tran2020lit}. 

\subsection{Advanced Virtual Screening Scoring Functions and Ensembles}
When experimental target data is known, specialized scoring functions have reported high enrichments on standardized benchmarks. For example, the Similarity of Interaction Energy VEctor Score (SIEVE-Score), which incorporates molecular docking within its workflow, achieved a median \gls{EF1} of 44.9 (10\%/90\% percentiles [27.3, 60.0]) on a subset of DUD-E targets, substantially outperforming classical Glide docking \cite{yasuo2019improved}. In comparison, the machine-learned RF-Score on the same subset yielded median \gls{EF1} values of 18.7 (10\%/90\% percentiles [4.1, 43.0]) and 38.6 (10\%/90\% percentiles [13.8, 58.9]) depending on training configuration \cite{yasuo2019improved}. 

Hybrid screening approaches have also demonstrated elevated performance. A protocol combining ensemble docking with electrostatic comparisons (e-Sim) \cite{cleves2019electrostatic} achieved a median \gls{EF1} of 46.1 across a 96-protein DUD-E subset \cite{cleves2020structure}. Furthermore, the ensemble learning method ENS-VS -- which integrates support vector machines (\glspl{SVM}), a decision tree, and a Fisher linear discriminant -- achieved a median \gls{EF1} of 57.0 (10\%/90\% percentiles [51.2, 63.9]) on DUD-E, compared to AutoDock Vina alone which reached 4.7 (10\%/90\% percentiles [3.3, 17.7]) \cite{li2020improved}. When evaluated on the DEKOIS dataset \cite{vogel2011dekois}, ENS-VS yielded a median \gls{EF1} of 29.5 (10\%/90\% percentiles [21.2, 37.9]), compared to Vina (2.4), Glide (9.5), SIEVE-Score (28.1), and RF-ScoreVS\_v3\_vina (16.6) \cite{li2020improved}.

\subsection{Deep Learning Representation Models}
Modern deep learning architectures focus on navigating high-dimensional molecular descriptor spaces or generating latent representations of these spaces. Recent Graph Neural Networks (GNNs) combining learned topologies with expert-curated descriptors have been trained on large-scale bioactivity datasets containing between 60,000 and 350,000 compounds \cite{liu2025advancements}. 

Similarly, Transformer models adapted for chemistry learn fundamental structural rules through pre-training on millions of unlabeled chemical structures \cite{MswahiliJ2024, BanerjeeKRPSKGB2025, UmerNALAUF2025}. Models like ChemBERTa \cite{chithrananda2020chemberta} are designed for fine-tuning on diverse downstream tasks. Another prominent transformer model, MolBERT \cite{fabian2020molecular}, designed to produce generalized molecular representations, achieved a BEDROC score of 0.344 $\pm$ 0.062 across 69 unique protein datasets \cite{fabian2020molecular}. While computationally powerful, these frameworks generally demand substantial labeled training data and high-performance computing resources, and their internal mechanisms remain highly complex to interpret compared to distance-based metric retrieval.

\subsection{Dimensionality Reduction Paradigms in Cheminformatics}
Chemical descriptor packages often generate highly multidimensional representations. Dimensionality reduction is critical to compress these spaces, extract latent chemical features, and permit visualization \cite{NguyenH2019}. 

Linear dimensionality reduction techniques like Principal Component Analysis (\gls{PCA}) generate embeddings by projecting data along directions of maximal variance \cite{oprea2001chemography}. However, because PCA relies entirely on linear transformations, it frequently struggles to maintain local, non-linear neighborhood structures that dictate molecular similarity and biological activity \cite{orlov2025high, lovric2021should}.

Conversely, non-linear techniques like t-distributed Stochastic Neighbor Embedding (t-SNE) \cite{van2008visualizing} and Uniform Manifold Approximation and Projection (\gls{UMAP}) \cite{mcinnes2018umap} resolve some of these limitations but utilize different mathematical frameworks. t-SNE focuses heavily on preserving local distances, which often causes global spatial relationships and inter-cluster distances between distinct chemotypes to lose quantitative meaning \cite{kobak2019art}. In contrast, \gls{UMAP} relies on Riemannian geometry and algebraic topology, allowing it to preserve local neighborhoods while maintaining global structure. This balance makes relative cluster positioning more physically interpretable. Furthermore, \gls{UMAP} is computationally faster, scales more effectively to large datasets, and offers standard formulations to project novel, out-of-sample molecules into an existing embedding space without requiring model retraining \cite{becht2019dimensionality}.

While manifold learning approaches are routinely utilized for visual representation \cite{probst2020visualization, cihan2022chemplot, orlov2025high} and clustering \cite{Hernandez-HernandezB2023, GuoHB2024, GuoHB2025}, their application as a direct distance-based scoring metric for virtual screening retrieval remains a relatively underexplored paradigm compared to classical fingerprint comparisons or linear projections.

\section{Molecular Descriptors}

The 1,611 Mordred 2D molecular descriptors used in this work are organised into twelve categories in Tables~\ref{tab:desc1}--\ref{tab:desc1b} and \ref{tab:desc2}--\ref{tab:desc5}.

The autocorrelation weight codes used throughout are:
$c$ = Gasteiger charge,
$dv$ = valence electrons,
$d$ = sigma electrons,
$s$ = intrinsic state,
$Z$ = atomic number,
$m$ = atomic mass,
$v$ = van-der-Waals volume,
$se$ = Sanderson electronegativity,
$pe$ = Pauling electronegativity,
$are$ = Allred--Rochow electronegativity,
$p$ = polarizability,
$i$ = first ionization potential.

% -----------------------------------------------------------------------
% Table 1a: Constitutional Descriptors
% -----------------------------------------------------------------------
\begin{table}[htbp]
\centering
\caption{Constitutional Descriptors.}
\label{tab:desc1}
\small
\begin{tabular}{p{4.5cm}p{7.5cm}}
\hline
\textbf{Descriptor(s)} & \textbf{Description} \\ \hline
$MW$, $AMW$ & Molecular weight and average molecular weight. \\
$nAtom$, $nHeavyAtom$, $nH$ & Total atom count, heavy atom count, hydrogen count. \\
$nHetero$, $nX$ & Heteroatom and halogen counts. \\
$nB$, $nC$, $nN$, $nO$, $nS$, $nP$ & Per-element atom counts (B, C, N, O, S, P). \\
$nF$, $nCl$, $nBr$, $nI$ & Halogen atom counts (F, Cl, Br, I). \\
$nAromAtom$, $nAromBond$ & Aromatic atom and bond counts. \\
$nSpiro$, $nBridgehead$ & Spiro and bridgehead atom counts. \\
$nAcid$, $nBase$ & Number of acidic and basic functional groups. \\
$nHBAcc$, $nHBDon$ & Hydrogen-bond acceptor and donor counts. \\
$nRot$, $RotRatio$ & Rotatable bond count and ratio. \\
$nBonds$, $nBondsO$, $nBondsS$ & Total bond count, single bonds, and bond-order sum. \\
$nBondsD$, $nBondsT$, $nBondsA$ & Double, triple, and aromatic bond counts. \\
$nBondsM$, $nBondsKS$, $nBondsKD$ & Multiple, Kekulé-single, and Kekulé-double bond counts. \\
$RNCG$, $RPCG$ & Relative negative/positive charge (most charged atom fraction). \\
$FCSP3$, $HybRatio$ & Fraction of $sp^3$ carbons; ratio of $sp^3$ atoms to heavy atoms. \\
$C1SP1$–$C4SP3$ & Carbon counts by hybridization state and substitution ($sp$, $sp^2$, $sp^3$). \\
$fragCpx$, $fMF$ & Fragment complexity and molecular framework fraction. \\
$apol$, $bpol$ & Sum of atomic and bond polarizabilities. \\
$VMcGowan$, $Vabc$, $VAdjMat$ & McGowan characteristic volume, ABC volume, adjacency-matrix volume. \\
$LabuteASA$ & Labute approximate surface area. \\ \hline
\end{tabular}
\end{table}

% -----------------------------------------------------------------------
% Table 1b: Graph-Theoretical & Physicochemical Descriptors
% -----------------------------------------------------------------------
\begin{table}[htbp]
\centering
\caption{Graph-Theoretical and Physicochemical Descriptors.}
\label{tab:desc1b}
\small
\begin{tabular}{p{4.5cm}p{7.5cm}}
\hline
\textbf{Descriptor(s)} & \textbf{Description} \\ \hline
$ABC$, $ABCGG$ & Atom-Bond Connectivity index and Graovac--Ghorbani variant. \\
$BalabanJ$ & Balaban's connectivity (J) index. \\
$WPath$, $WPol$ & Wiener path index and Wiener polarity index. \\
$Zagreb1$, $Zagreb2$, $mZagreb1$, $mZagreb2$ & First and second Zagreb indices and their modified variants. \\
$BertzCT$ & Bertz complexity index. \\
$ECIndex$ & Eccentric connectivity index. \\
$DetourIndex$ & Detour index (longest-path sum). \\
$Diameter$, $Radius$ & Topological diameter and radius. \\
$TopoShapeIndex$, $PetitjeanIndex$ & Topological and Petitjean shape indices. \\
$SLogP$, $SMR$ & Wildman--Crippen $\log P$ and molar refractivity estimates. \\
$FilterItLogS$ & Filter-it predicted aqueous solubility ($\log S$). \\
$TopoPSA$, $TopoPSA(NO)$ & Topological polar surface area (all heteroatoms; N/O only). \\ \hline
\end{tabular}
\end{table}

% -----------------------------------------------------------------------
% Table 2: Autocorrelation Descriptors
% -----------------------------------------------------------------------
\begin{table}[htbp]
\centering
\caption{Autocorrelation Descriptors. Weight codes $w \in \{c, dv, d, s, Z, m, v, se, pe, are, p, i\}$
(see text); lag $k$ ranges as noted.}
\label{tab:desc2}
\small
\begin{tabular}{lp{4.5cm}p{7cm}}
\hline
\textbf{Category} & \textbf{Descriptor(s)} & \textbf{Description} \\ \hline
\multirow{8}{*}{\textbf{Autocorrelation}}
  & $ATS_k w$ & Moreau--Broto autocorrelation, lag $k=0$--$8$;
      weights $w \in \{dv, d, s, Z, m, v, se, pe, are, p, i\}$. \\
  & $AATS_k w$ & Average Moreau--Broto autocorrelation, lag $k=0$--$8$;
      same weights as $ATS$. \\
  & $ATSC_k w$ & Centred Moreau--Broto autocorrelation, lag $k=0$--$8$;
      weights $w \in \{c, dv, d, s, Z, m, v, se, pe, are, p, i\}$. \\
  & $AATSC_k w$ & Average centred Moreau--Broto autocorrelation, lag $k=0$--$8$;
      same weights as $ATSC$. \\
  & $MATS_k w$ & Moran autocorrelation index, lag $k=1$--$8$;
      weights $w \in \{c, dv, d, s, Z, m, v, se, pe, are, p, i\}$. \\
  & $GATS_k w$ & Geary autocorrelation index, lag $k=1$--$8$;
      same weights as $MATS$. \\ \hline
\multirow{2}{*}{\textbf{BCUT}}
  & $BCUTw\text{-}1h$, $BCUTw\text{-}1l$ & Highest and lowest eigenvalues of the
      Burden matrix weighted by property $w \in \{c, dv, d, s, Z, m, v, se, pe, are, p, i\}$
      (24 descriptors total). \\ \hline
\end{tabular}
\end{table}

% -----------------------------------------------------------------------
% Table 3: Matrix Spectral, Connectivity & E-State
% -----------------------------------------------------------------------
\begin{table}[htbp]
\centering
\caption{Matrix Spectral, Connectivity, and E-State Descriptors.}
\label{tab:desc3}
\small
\begin{tabular}{lp{4.5cm}p{7cm}}
\hline
\textbf{Category} & \textbf{Descriptor(s)} & \textbf{Description} \\ \hline
\multirow{10}{*}{\textbf{Matrix Spectral}}
  & $SpAbs\_A$, $SpMax\_A$, $SpDiam\_A$ & Spectral absolute value, maximum eigenvalue, and
      spectral diameter of the adjacency matrix ($A$). \\
  & $SpAD\_A$, $SpMAD\_A$, $LogEE\_A$ & Spectral absolute deviation, mean absolute deviation,
      and log of Estrada index for $A$. \\
  & $VE1\_A$--$VE3\_A$, $VR1\_A$--$VR3\_A$ & Eigenvector-based and Randić-type
      spectral coefficients of $A$. \\
  & $SpAbs\_D$, $SpMax\_D$, $\ldots$, $VR3\_D$ & Analogous spectral descriptors for the
      topological distance matrix ($D$). \\
  & $SpAbs\_Dt$, $SpMax\_Dt$, $\ldots$, $VR3\_Dt$, $SM1\_Dt$ & Spectral descriptors for
      the detour matrix ($Dt$), including $SM1$ (sum of eigenvalues). \\
  & $SpAbs\_Dz w$, $\ldots$, $VR3\_Dz w$, $SM1\_Dz w$ & Spectral descriptors for
      property-weighted distance matrices ($Dz$),
      $w \in \{Z, m, v, se, pe, are, p, i\}$ (8 matrices, 13 stats each = 104 descriptors). \\
  & $SZ$, $Sm$, $Sv$, $Sse$, $Spe$, $Sare$, $Sp$, $Si$ & Sum of atomic property values
      over all heavy atoms ($Z$, $m$, $v$, $se$, $pe$, $are$, $p$, $i$). \\
  & $MZ$, $Mm$, $Mv$, $Mse$, $Mpe$, $Mare$, $Mp$, $Mi$ & Mean of atomic property values. \\ \hline
\multirow{5}{*}{\textbf{Connectivity}}
  & $Xp\text{-}k d$, $Xp\text{-}k dv$ & Path chi indices, orders $k=0$--$7$,
      for $\sigma$-electron ($d$) and valence-electron ($dv$) weights. \\
  & $AXp\text{-}k d$, $AXp\text{-}k dv$ & Average path chi indices, orders $k=0$--$7$. \\
  & $Xc\text{-}k d$, $Xc\text{-}k dv$ & Cluster chi indices, orders $k=3$--$6$. \\
  & $Xpc\text{-}k d$, $Xpc\text{-}k dv$ & Path-cluster chi indices, orders $k=4$--$6$. \\
  & $Xch\text{-}k d$, $Xch\text{-}k dv$ & Chain chi indices, orders $k=3$--$7$. \\
  & $Kier1$, $Kier2$, $Kier3$ & Kier shape indices ($\kappa_1$, $\kappa_2$, $\kappa_3$). \\ \hline
\multirow{5}{*}{\textbf{E-State}}
  & $Ns XY$ & Count of atom type $XY$ (e.g., $NsCH3$, $NaaNH$, $NsOH$);
      covers C, N, O, S, P, Si, Ge, As, Se, Sn, Pb, and halogen environments. \\
  & $Ss XY$ & E-State index sum over atom type $XY$. \\
  & $MAXs XY$ & Maximum E-State value over atom type $XY$. \\
  & $MINs XY$ & Minimum E-State value over atom type $XY$. \\
  & $EState\_VSA1$--$10$ & Surface area contributions binned by E-State value. \\
  & $VSA\_EState1$--$9$ & E-State contributions binned by surface area. \\ \hline
\end{tabular}
\end{table}

% -----------------------------------------------------------------------
% Table 4: Surface Area, Ring & Walk, Information Indices
% -----------------------------------------------------------------------
\begin{table}[htbp]
\centering
\caption{Surface Area, Ring/Walk, and Information-Theoretic Descriptors.}
\label{tab:desc4}
\small
\begin{tabular}{lp{4.5cm}p{7cm}}
\hline
\textbf{Category} & \textbf{Descriptor(s)} & \textbf{Description} \\ \hline
\multirow{4}{*}{\textbf{Surface Area}}
  & $SlogP\_VSA1$--$11$ & Surface area contributions binned by Wildman--Crippen $\log P$ values. \\
  & $SMR\_VSA1$--$9$ & Surface area contributions binned by molar refractivity. \\
  & $PEOE\_VSA1$--$13$ & Surface area contributions binned by Gasteiger partial charges. \\ \hline
\multirow{6}{*}{\textbf{Rings \& Walks}}
  & $nRing$, $n3Ring$--$n12Ring$, $nG12Ring$ & Total ring count and ring-size counts
      (3--12 members; $nG12Ring$ for rings $>12$). \\
  & $nHRing$, $n3HRing$--$nG12HRing$ & Heteroatom-containing ring counts by size. \\
  & $naRing$, $n3aRing$--$nG12aRing$ & Aromatic ring counts by size. \\
  & $naHRing$, $n3aHRing$--$nG12aHRing$ & Aromatic heteroatom ring counts by size. \\
  & $nARing$, $nAHRing$ and size variants & Aliphatic ring and aliphatic heteroatom ring counts. \\
  & $nFRing$, $nFHRing$, $nFaRing$, $nFaHRing$, $nFARing$, $nFAHRing$ and size variants
      & Fused-ring counts (fused hetero, aromatic, aliphatic variations) by ring size. \\
  & $MWC01$--$MWC10$, $TMWC10$ & Molecular Walk Counts of lengths 1--10 and total. \\
  & $SRW02$--$SRW10$, $TSRW10$ & Self-Returning Walk counts of lengths 2--10 and total. \\ \hline
\multirow{5}{*}{\textbf{Information}}
  & $IC0$--$IC5$ & Information content indices at graph distance 0--5. \\
  & $TIC0$--$TIC5$ & Total information content indices. \\
  & $SIC0$--$SIC5$ & Structural information content indices. \\
  & $BIC0$--$BIC5$ & Bonding information content indices. \\
  & $CIC0$--$CIC5$ & Complementary information content indices. \\
  & $MIC0$--$MIC5$ & Modified information content indices. \\
  & $ZMIC0$--$ZMIC5$ & Atomic number-weighted modified information content. \\ \hline
\multirow{3}{*}{\textbf{Distance Codes}}
  & $MID$, $AMID$ & Mean information-theoretic distance and its average. \\
  & $MID\_h$, $AMID\_h$ & $MID$ / $AMID$ restricted to hydrogen-containing paths. \\
  & $MID\_C$, $AMID\_C$; $MID\_N$, $AMID\_N$; $MID\_O$, $AMID\_O$; $MID\_X$, $AMID\_X$
      & Element-resolved mean distances (C, N, O, halogen). \\ \hline
\end{tabular}
\end{table}

% -----------------------------------------------------------------------
% Table 5: Topological Charge, Walk/Path Counts, ETA, Physicochemical
% -----------------------------------------------------------------------
\begin{table}[htbp]
\centering
\caption{Topological Charge, Walk/Path Counts, and Topochemical (ETA) Descriptors.}
\label{tab:desc5}
\small
\begin{tabular}{lp{4.5cm}p{7cm}}
\hline
\textbf{Category} & \textbf{Descriptor(s)} & \textbf{Description} \\ \hline
\multirow{4}{*}{\textbf{Topological Charge}}
  & $GGI1$--$GGI10$ & Galvez topological charge indices of order 1--10. \\
  & $JGI1$--$JGI10$ & Mean topological charge indices of order 1--10. \\
  & $JGT10$ & Total topological charge index (sum over orders 1--10). \\ \hline
\multirow{4}{*}{\textbf{Path/Walk Counts}}
  & $MPC2$--$MPC10$, $TMPC10$ & Molecular path counts of length 2--10 and total. \\
  & $piPC1$--$piPC10$, $TpiPC10$ & $\pi$-path counts of length 1--10 and total. \\
  & $MDEC\text{-}ij$ & Molecular distance-edge carbon descriptors
      ($ij \in \{11,12,13,14,22,23,24,33,34,44\}$). \\
  & $MDEO\text{-}ij$ & Molecular distance-edge oxygen descriptors
      ($ij \in \{11,12,22\}$). \\
  & $MDEN\text{-}ij$ & Molecular distance-edge nitrogen descriptors
      ($ij \in \{11,12,13,22,23,33\}$). \\ \hline
\multirow{8}{*}{\textbf{Topochemical (ETA)}}
  & $ETA\_alpha$, $AETA\_alpha$ & ETA alpha (composite atom contribution) and average. \\
  & $ETA\_shape\_p$, $ETA\_shape\_y$, $ETA\_shape\_x$ & ETA shape descriptors
      (parabolic, Y-shaped, X-shaped molecular profiles). \\
  & $ETA\_beta$, $AETA\_beta$, $ETA\_beta\_s$, $AETA\_beta\_s$ & ETA beta (total and
      sigma-only) and averages. \\
  & $ETA\_beta\_ns$, $AETA\_beta\_ns$, $ETA\_beta\_ns\_d$, $AETA\_beta\_ns\_d$
      & ETA beta non-sigma contributions and their differences. \\
  & $ETA\_eta$, $AETA\_eta$, $ETA\_eta\_L$, $AETA\_eta\_L$ & ETA eta indices
      (local and average). \\
  & $ETA\_eta\_R$, $AETA\_eta\_R$, $ETA\_eta\_RL$, $AETA\_eta\_RL$ & ETA eta R-branch
      indices and averages. \\
  & $ETA\_eta\_F$, $AETA\_eta\_F$, $ETA\_eta\_FL$, $AETA\_eta\_FL$ & ETA eta F indices
      and averages. \\
  & $ETA\_eta\_B$, $AETA\_eta\_B$, $ETA\_eta\_BR$, $AETA\_eta\_BR$ & ETA eta B indices
      and averages. \\
  & $ETA\_dAlpha\_A$, $ETA\_dAlpha\_B$ & Delta-alpha ETA descriptors (A and B variants). \\
  & $ETA\_epsilon\_1$--$5$ & ETA epsilon descriptors (five variants). \\
  & $ETA\_dEpsilon\_A$--$D$ & Delta-epsilon ETA descriptors. \\
  & $ETA\_dBeta$, $AETA\_dBeta$ & Delta-beta and average delta-beta. \\
  & $ETA\_psi\_1$, $ETA\_dPsi\_A$, $ETA\_dPsi\_B$ & ETA psi and delta-psi descriptors. \\ \hline
\end{tabular}
\end{table}


% USED FEATURES:

%ABC,ABCGG,nAcid,nBase,SpAbs_A,SpMax_A,SpDiam_A,SpAD_A,SpMAD_A,LogEE_A,VE1_A,VE2_A,VE3_A,VR1_A,VR2_A,VR3_A,nAromAtom,nAromBond,nAtom,nHeavyAtom,nSpiro,nBridgehead,nHetero,nH,nB,nC,nN,nO,nS,nP,nF,nCl,nBr,nI,nX,ATS0dv,ATS1dv,ATS2dv,ATS3dv,ATS4dv,ATS5dv,ATS6dv,ATS7dv,ATS8dv,ATS0d,ATS1d,ATS2d,ATS3d,ATS4d,ATS5d,ATS6d,ATS7d,ATS8d,ATS0s,ATS1s,ATS2s,ATS3s,ATS4s,ATS5s,ATS6s,ATS7s,ATS8s,ATS0Z,ATS1Z,ATS2Z,ATS3Z,ATS4Z,ATS5Z,ATS6Z,ATS7Z,ATS8Z,ATS0m,ATS1m,ATS2m,ATS3m,ATS4m,ATS5m,ATS6m,ATS7m,ATS8m,ATS0v,ATS1v,ATS2v,ATS3v,ATS4v,ATS5v,ATS6v,ATS7v,ATS8v,ATS0se,ATS1se,ATS2se,ATS3se,ATS4se,ATS5se,ATS6se,ATS7se,ATS8se,ATS0pe,ATS1pe,ATS2pe,ATS3pe,ATS4pe,ATS5pe,ATS6pe,ATS7pe,ATS8pe,ATS0are,ATS1are,ATS2are,ATS3are,ATS4are,ATS5are,ATS6are,ATS7are,ATS8are,ATS0p,ATS1p,ATS2p,ATS3p,ATS4p,ATS5p,ATS6p,ATS7p,ATS8p,ATS0i,ATS1i,ATS2i,ATS3i,ATS4i,ATS5i,ATS6i,ATS7i,ATS8i,AATS0dv,AATS1dv,AATS2dv,AATS3dv,AATS4dv,AATS5dv,AATS6dv,AATS7dv,AATS8dv,AATS0d,AATS1d,AATS2d,AATS3d,AATS4d,AATS5d,AATS6d,AATS7d,AATS8d,AATS0s,AATS1s,AATS2s,AATS3s,AATS4s,AATS5s,AATS6s,AATS7s,AATS8s,AATS0Z,AATS1Z,AATS2Z,AATS3Z,AATS4Z,AATS5Z,AATS6Z,AATS7Z,AATS8Z,AATS0m,AATS1m,AATS2m,AATS3m,AATS4m,AATS5m,AATS6m,AATS7m,AATS8m,AATS0v,AATS1v,AATS2v,AATS3v,AATS4v,AATS5v,AATS6v,AATS7v,AATS8v,AATS0se,AATS1se,AATS2se,AATS3se,AATS4se,AATS5se,AATS6se,AATS7se,AATS8se,AATS0pe,AATS1pe,AATS2pe,AATS3pe,AATS4pe,AATS5pe,AATS6pe,AATS7pe,AATS8pe,AATS0are,AATS1are,AATS2are,AATS3are,AATS4are,AATS5are,AATS6are,AATS7are,AATS8are,AATS0p,AATS1p,AATS2p,AATS3p,AATS4p,AATS5p,AATS6p,AATS7p,AATS8p,AATS0i,AATS1i,AATS2i,AATS3i,AATS4i,AATS5i,AATS6i,AATS7i,AATS8i,ATSC0c,ATSC1c,ATSC2c,ATSC3c,ATSC4c,ATSC5c,ATSC6c,ATSC7c,ATSC8c,ATSC0dv,ATSC1dv,ATSC2dv,ATSC3dv,ATSC4dv,ATSC5dv,ATSC6dv,ATSC7dv,ATSC8dv,ATSC0d,ATSC1d,ATSC2d,ATSC3d,ATSC4d,ATSC5d,ATSC6d,ATSC7d,ATSC8d,ATSC0s,ATSC1s,ATSC2s,ATSC3s,ATSC4s,ATSC5s,ATSC6s,ATSC7s,ATSC8s,ATSC0Z,ATSC1Z,ATSC2Z,ATSC3Z,ATSC4Z,ATSC5Z,ATSC6Z,ATSC7Z,ATSC8Z,ATSC0m,ATSC1m,ATSC2m,ATSC3m,ATSC4m,ATSC5m,ATSC6m,ATSC7m,ATSC8m,ATSC0v,ATSC1v,ATSC2v,ATSC3v,ATSC4v,ATSC5v,ATSC6v,ATSC7v,ATSC8v,ATSC0se,ATSC1se,ATSC2se,ATSC3se,ATSC4se,ATSC5se,ATSC6se,ATSC7se,ATSC8se,ATSC0pe,ATSC1pe,ATSC2pe,ATSC3pe,ATSC4pe,ATSC5pe,ATSC6pe,ATSC7pe,ATSC8pe,ATSC0are,ATSC1are,ATSC2are,ATSC3are,ATSC4are,ATSC5are,ATSC6are,ATSC7are,ATSC8are,ATSC0p,ATSC1p,ATSC2p,ATSC3p,ATSC4p,ATSC5p,ATSC6p,ATSC7p,ATSC8p,ATSC0i,ATSC1i,ATSC2i,ATSC3i,ATSC4i,ATSC5i,ATSC6i,ATSC7i,ATSC8i,AATSC0c,AATSC1c,AATSC2c,AATSC3c,AATSC4c,AATSC5c,AATSC6c,AATSC7c,AATSC8c,AATSC0dv,AATSC1dv,AATSC2dv,AATSC3dv,AATSC4dv,AATSC5dv,AATSC6dv,AATSC7dv,AATSC8dv,AATSC0d,AATSC1d,AATSC2d,AATSC3d,AATSC4d,AATSC5d,AATSC6d,AATSC7d,AATSC8d,AATSC0s,AATSC1s,AATSC2s,AATSC3s,AATSC4s,AATSC5s,AATSC6s,AATSC7s,AATSC8s,AATSC0Z,AATSC1Z,AATSC2Z,AATSC3Z,AATSC4Z,AATSC5Z,AATSC6Z,AATSC7Z,AATSC8Z,AATSC0m,AATSC1m,AATSC2m,AATSC3m,AATSC4m,AATSC5m,AATSC6m,AATSC7m,AATSC8m,AATSC0v,AATSC1v,AATSC2v,AATSC3v,AATSC4v,AATSC5v,AATSC6v,AATSC7v,AATSC8v,AATSC0se,AATSC1se,AATSC2se,AATSC3se,AATSC4se,AATSC5se,AATSC6se,AATSC7se,AATSC8se,AATSC0pe,AATSC1pe,AATSC2pe,AATSC3pe,AATSC4pe,AATSC5pe,AATSC6pe,AATSC7pe,AATSC8pe,AATSC0are,AATSC1are,AATSC2are,AATSC3are,AATSC4are,AATSC5are,AATSC6are,AATSC7are,AATSC8are,AATSC0p,AATSC1p,AATSC2p,AATSC3p,AATSC4p,AATSC5p,AATSC6p,AATSC7p,AATSC8p,AATSC0i,AATSC1i,AATSC2i,AATSC3i,AATSC4i,AATSC5i,AATSC6i,AATSC7i,AATSC8i,MATS1c,MATS2c,MATS3c,MATS4c,MATS5c,MATS6c,MATS7c,MATS8c,MATS1dv,MATS2dv,MATS3dv,MATS4dv,MATS5dv,MATS6dv,MATS7dv,MATS8dv,MATS1d,MATS2d,MATS3d,MATS4d,MATS5d,MATS6d,MATS7d,MATS8d,MATS1s,MATS2s,MATS3s,MATS4s,MATS5s,MATS6s,MATS7s,MATS8s,MATS1Z,MATS2Z,MATS3Z,MATS4Z,MATS5Z,MATS6Z,MATS7Z,MATS8Z,MATS1m,MATS2m,MATS3m,MATS4m,MATS5m,MATS6m,MATS7m,MATS8m,MATS1v,MATS2v,MATS3v,MATS4v,MATS5v,MATS6v,MATS7v,MATS8v,MATS1se,MATS2se,MATS3se,MATS4se,MATS5se,MATS6se,MATS7se,MATS8se,MATS1pe,MATS2pe,MATS3pe,MATS4pe,MATS5pe,MATS6pe,MATS7pe,MATS8pe,MATS1are,MATS2are,MATS3are,MATS4are,MATS5are,MATS6are,MATS7are,MATS8are,MATS1p,MATS2p,MATS3p,MATS4p,MATS5p,MATS6p,MATS7p,MATS8p,MATS1i,MATS2i,MATS3i,MATS4i,MATS5i,MATS6i,MATS7i,MATS8i,GATS1c,GATS2c,GATS3c,GATS4c,GATS5c,GATS6c,GATS7c,GATS8c,GATS1dv,GATS2dv,GATS3dv,GATS4dv,GATS5dv,GATS6dv,GATS7dv,GATS8dv,GATS1d,GATS2d,GATS3d,GATS4d,GATS5d,GATS6d,GATS7d,GATS8d,GATS1s,GATS2s,GATS3s,GATS4s,GATS5s,GATS6s,GATS7s,GATS8s,GATS1Z,GATS2Z,GATS3Z,GATS4Z,GATS5Z,GATS6Z,GATS7Z,GATS8Z,GATS1m,GATS2m,GATS3m,GATS4m,GATS5m,GATS6m,GATS7m,GATS8m,GATS1v,GATS2v,GATS3v,GATS4v,GATS5v,GATS6v,GATS7v,GATS8v,GATS1se,GATS2se,GATS3se,GATS4se,GATS5se,GATS6se,GATS7se,GATS8se,GATS1pe,GATS2pe,GATS3pe,GATS4pe,GATS5pe,GATS6pe,GATS7pe,GATS8pe,GATS1are,GATS2are,GATS3are,GATS4are,GATS5are,GATS6are,GATS7are,GATS8are,GATS1p,GATS2p,GATS3p,GATS4p,GATS5p,GATS6p,GATS7p,GATS8p,GATS1i,GATS2i,GATS3i,GATS4i,GATS5i,GATS6i,GATS7i,GATS8i,BCUTc-1h,BCUTc-1l,BCUTdv-1h,BCUTdv-1l,BCUTd-1h,BCUTd-1l,BCUTs-1h,BCUTs-1l,BCUTZ-1h,BCUTZ-1l,BCUTm-1h,BCUTm-1l,BCUTv-1h,BCUTv-1l,BCUTse-1h,BCUTse-1l,BCUTpe-1h,BCUTpe-1l,BCUTare-1h,BCUTare-1l,BCUTp-1h,BCUTp-1l,BCUTi-1h,BCUTi-1l,BalabanJ,SpAbs_DzZ,SpMax_DzZ,SpDiam_DzZ,SpAD_DzZ,SpMAD_DzZ,LogEE_DzZ,SM1_DzZ,VE1_DzZ,VE2_DzZ,VE3_DzZ,VR1_DzZ,VR2_DzZ,VR3_DzZ,SpAbs_Dzm,SpMax_Dzm,SpDiam_Dzm,SpAD_Dzm,SpMAD_Dzm,LogEE_Dzm,SM1_Dzm,VE1_Dzm,VE2_Dzm,VE3_Dzm,VR1_Dzm,VR2_Dzm,VR3_Dzm,SpAbs_Dzv,SpMax_Dzv,SpDiam_Dzv,SpAD_Dzv,SpMAD_Dzv,LogEE_Dzv,SM1_Dzv,VE1_Dzv,VE2_Dzv,VE3_Dzv,VR1_Dzv,VR2_Dzv,VR3_Dzv,SpAbs_Dzse,SpMax_Dzse,SpDiam_Dzse,SpAD_Dzse,SpMAD_Dzse,LogEE_Dzse,SM1_Dzse,VE1_Dzse,VE2_Dzse,VE3_Dzse,VR1_Dzse,VR2_Dzse,VR3_Dzse,SpAbs_Dzpe,SpMax_Dzpe,SpDiam_Dzpe,SpAD_Dzpe,SpMAD_Dzpe,LogEE_Dzpe,SM1_Dzpe,VE1_Dzpe,VE2_Dzpe,VE3_Dzpe,VR1_Dzpe,VR2_Dzpe,VR3_Dzpe,SpAbs_Dzare,SpMax_Dzare,SpDiam_Dzare,SpAD_Dzare,SpMAD_Dzare,LogEE_Dzare,SM1_Dzare,VE1_Dzare,VE2_Dzare,VE3_Dzare,VR1_Dzare,VR2_Dzare,VR3_Dzare,SpAbs_Dzp,SpMax_Dzp,SpDiam_Dzp,SpAD_Dzp,SpMAD_Dzp,LogEE_Dzp,SM1_Dzp,VE1_Dzp,VE2_Dzp,VE3_Dzp,VR1_Dzp,VR2_Dzp,VR3_Dzp,SpAbs_Dzi,SpMax_Dzi,SpDiam_Dzi,SpAD_Dzi,SpMAD_Dzi,LogEE_Dzi,SM1_Dzi,VE1_Dzi,VE2_Dzi,VE3_Dzi,VR1_Dzi,VR2_Dzi,VR3_Dzi,BertzCT,nBonds,nBondsO,nBondsS,nBondsD,nBondsT,nBondsA,nBondsM,nBondsKS,nBondsKD,RNCG,RPCG,C1SP1,C2SP1,C1SP2,C2SP2,C3SP2,C1SP3,C2SP3,C3SP3,C4SP3,HybRatio,FCSP3,Xch-3d,Xch-4d,Xch-5d,Xch-6d,Xch-7d,Xch-3dv,Xch-4dv,Xch-5dv,Xch-6dv,Xch-7dv,Xc-3d,Xc-4d,Xc-5d,Xc-6d,Xc-3dv,Xc-4dv,Xc-5dv,Xc-6dv,Xpc-4d,Xpc-5d,Xpc-6d,Xpc-4dv,Xpc-5dv,Xpc-6dv,Xp-0d,Xp-1d,Xp-2d,Xp-3d,Xp-4d,Xp-5d,Xp-6d,Xp-7d,AXp-0d,AXp-1d,AXp-2d,AXp-3d,AXp-4d,AXp-5d,AXp-6d,AXp-7d,Xp-0dv,Xp-1dv,Xp-2dv,Xp-3dv,Xp-4dv,Xp-5dv,Xp-6dv,Xp-7dv,AXp-0dv,AXp-1dv,AXp-2dv,AXp-3dv,AXp-4dv,AXp-5dv,AXp-6dv,AXp-7dv,SZ,Sm,Sv,Sse,Spe,Sare,Sp,Si,MZ,Mm,Mv,Mse,Mpe,Mare,Mp,Mi,SpAbs_Dt,SpMax_Dt,SpDiam_Dt,SpAD_Dt,SpMAD_Dt,LogEE_Dt,SM1_Dt,VE1_Dt,VE2_Dt,VE3_Dt,VR1_Dt,VR2_Dt,VR3_Dt,DetourIndex,SpAbs_D,SpMax_D,SpDiam_D,SpAD_D,SpMAD_D,LogEE_D,VE1_D,VE2_D,VE3_D,VR1_D,VR2_D,VR3_D,NsLi,NssBe,NssssBe,NssBH,NsssB,NssssB,NsCH3,NdCH2,NssCH2,NtCH,NdsCH,NaaCH,NsssCH,NddC,NtsC,NdssC,NaasC,NaaaC,NssssC,NsNH3,NsNH2,NssNH2,NdNH,NssNH,NaaNH,NtN,NsssNH,NdsN,NaaN,NsssN,NddsN,NaasN,NssssN,NsOH,NdO,NssO,NaaO,NsF,NsSiH3,NssSiH2,NsssSiH,NssssSi,NsPH2,NssPH,NsssP,NdsssP,NsssssP,NsSH,NdS,NssS,NaaS,NdssS,NddssS,NsCl,NsGeH3,NssGeH2,NsssGeH,NssssGe,NsAsH2,NssAsH,NsssAs,NsssdAs,NsssssAs,NsSeH,NdSe,NssSe,NaaSe,NdssSe,NddssSe,NsBr,NsSnH3,NssSnH2,NsssSnH,NssssSn,NsI,NsPbH3,NssPbH2,NsssPbH,NssssPb,SsLi,SssBe,SssssBe,SssBH,SsssB,SssssB,SsCH3,SdCH2,SssCH2,StCH,SdsCH,SaaCH,SsssCH,SddC,StsC,SdssC,SaasC,SaaaC,SssssC,SsNH3,SsNH2,SssNH2,SdNH,SssNH,SaaNH,StN,SsssNH,SdsN,SaaN,SsssN,SddsN,SaasN,SssssN,SsOH,SdO,SssO,SaaO,SsF,SsSiH3,SssSiH2,SsssSiH,SssssSi,SsPH2,SssPH,SsssP,SdsssP,SsssssP,SsSH,SdS,SssS,SaaS,SdssS,SddssS,SsCl,SsGeH3,SssGeH2,SsssGeH,SssssGe,SsAsH2,SssAsH,SsssAs,SsssdAs,SsssssAs,SsSeH,SdSe,SssSe,SaaSe,SdssSe,SddssSe,SsBr,SsSnH3,SssSnH2,SsssSnH,SssssSn,SsI,SsPbH3,SssPbH2,SsssPbH,SssssPb,MAXsLi,MAXssBe,MAXssssBe,MAXssBH,MAXsssB,MAXssssB,MAXsCH3,MAXdCH2,MAXssCH2,MAXtCH,MAXdsCH,MAXaaCH,MAXsssCH,MAXddC,MAXtsC,MAXdssC,MAXaasC,MAXaaaC,MAXssssC,MAXsNH3,MAXsNH2,MAXssNH2,MAXdNH,MAXssNH,MAXaaNH,MAXtN,MAXsssNH,MAXdsN,MAXaaN,MAXsssN,MAXddsN,MAXaasN,MAXssssN,MAXsOH,MAXdO,MAXssO,MAXaaO,MAXsF,MAXsSiH3,MAXssSiH2,MAXsssSiH,MAXssssSi,MAXsPH2,MAXssPH,MAXsssP,MAXdsssP,MAXsssssP,MAXsSH,MAXdS,MAXssS,MAXaaS,MAXdssS,MAXddssS,MAXsCl,MAXsGeH3,MAXssGeH2,MAXsssGeH,MAXssssGe,MAXsAsH2,MAXssAsH,MAXsssAs,MAXsssdAs,MAXsssssAs,MAXsSeH,MAXdSe,MAXssSe,MAXaaSe,MAXdssSe,MAXddssSe,MAXsBr,MAXsSnH3,MAXssSnH2,MAXsssSnH,MAXssssSn,MAXsI,MAXsPbH3,MAXssPbH2,MAXsssPbH,MAXssssPb,MINsLi,MINssBe,MINssssBe,MINssBH,MINsssB,MINssssB,MINsCH3,MINdCH2,MINssCH2,MINtCH,MINdsCH,MINaaCH,MINsssCH,MINddC,MINtsC,MINdssC,MINaasC,MINaaaC,MINssssC,MINsNH3,MINsNH2,MINssNH2,MINdNH,MINssNH,MINaaNH,MINtN,MINsssNH,MINdsN,MINaaN,MINsssN,MINddsN,MINaasN,MINssssN,MINsOH,MINdO,MINssO,MINaaO,MINsF,MINsSiH3,MINssSiH2,MINsssSiH,MINssssSi,MINsPH2,MINssPH,MINsssP,MINdsssP,MINsssssP,MINsSH,MINdS,MINssS,MINaaS,MINdssS,MINddssS,MINsCl,MINsGeH3,MINssGeH2,MINsssGeH,MINssssGe,MINsAsH2,MINssAsH,MINsssAs,MINsssdAs,MINsssssAs,MINsSeH,MINdSe,MINssSe,MINaaSe,MINdssSe,MINddssSe,MINsBr,MINsSnH3,MINssSnH2,MINsssSnH,MINssssSn,MINsI,MINsPbH3,MINssPbH2,MINsssPbH,MINssssPb,ECIndex,ETA_alpha,AETA_alpha,ETA_shape_p,ETA_shape_y,ETA_shape_x,ETA_beta,AETA_beta,ETA_beta_s,AETA_beta_s,ETA_beta_ns,AETA_beta_ns,ETA_beta_ns_d,AETA_beta_ns_d,ETA_eta,AETA_eta,ETA_eta_L,AETA_eta_L,ETA_eta_R,AETA_eta_R,ETA_eta_RL,AETA_eta_RL,ETA_eta_F,AETA_eta_F,ETA_eta_FL,AETA_eta_FL,ETA_eta_B,AETA_eta_B,ETA_eta_BR,AETA_eta_BR,ETA_dAlpha_A,ETA_dAlpha_B,ETA_epsilon_1,ETA_epsilon_2,ETA_epsilon_3,ETA_epsilon_4,ETA_epsilon_5,ETA_dEpsilon_A,ETA_dEpsilon_B,ETA_dEpsilon_C,ETA_dEpsilon_D,ETA_dBeta,AETA_dBeta,ETA_psi_1,ETA_dPsi_A,ETA_dPsi_B,fragCpx,fMF,nHBAcc,nHBDon,IC0,IC1,IC2,IC3,IC4,IC5,TIC0,TIC1,TIC2,TIC3,TIC4,TIC5,SIC0,SIC1,SIC2,SIC3,SIC4,SIC5,BIC0,BIC1,BIC2,BIC3,BIC4,BIC5,CIC0,CIC1,CIC2,CIC3,CIC4,CIC5,MIC0,MIC1,MIC2,MIC3,MIC4,MIC5,ZMIC0,ZMIC1,ZMIC2,ZMIC3,ZMIC4,ZMIC5,Kier1,Kier2,Kier3,FilterItLogS,VMcGowan,LabuteASA,PEOE_VSA1,PEOE_VSA2,PEOE_VSA3,PEOE_VSA4,PEOE_VSA5,PEOE_VSA6,PEOE_VSA7,PEOE_VSA8,PEOE_VSA9,PEOE_VSA10,PEOE_VSA11,PEOE_VSA12,PEOE_VSA13,SMR_VSA1,SMR_VSA2,SMR_VSA3,SMR_VSA4,SMR_VSA5,SMR_VSA6,SMR_VSA7,SMR_VSA8,SMR_VSA9,SlogP_VSA1,SlogP_VSA2,SlogP_VSA3,SlogP_VSA4,SlogP_VSA5,SlogP_VSA6,SlogP_VSA7,SlogP_VSA8,SlogP_VSA9,SlogP_VSA10,SlogP_VSA11,EState_VSA1,EState_VSA2,EState_VSA3,EState_VSA4,EState_VSA5,EState_VSA6,EState_VSA7,EState_VSA8,EState_VSA9,EState_VSA10,VSA_EState1,VSA_EState2,VSA_EState3,VSA_EState4,VSA_EState5,VSA_EState6,VSA_EState7,VSA_EState8,VSA_EState9,MDEC-11,MDEC-12,MDEC-13,MDEC-14,MDEC-22,MDEC-23,MDEC-24,MDEC-33,MDEC-34,MDEC-44,MDEO-11,MDEO-12,MDEO-22,MDEN-11,MDEN-12,MDEN-13,MDEN-22,MDEN-23,MDEN-33,MID,AMID,MID_h,AMID_h,MID_C,AMID_C,MID_N,AMID_N,MID_O,AMID_O,MID_X,AMID_X,MPC2,MPC3,MPC4,MPC5,MPC6,MPC7,MPC8,MPC9,MPC10,TMPC10,piPC1,piPC2,piPC3,piPC4,piPC5,piPC6,piPC7,piPC8,piPC9,piPC10,TpiPC10,apol,bpol,nRing,n3Ring,n4Ring,n5Ring,n6Ring,n7Ring,n8Ring,n9Ring,n10Ring,n11Ring,n12Ring,nG12Ring,nHRing,n3HRing,n4HRing,n5HRing,n6HRing,n7HRing,n8HRing,n9HRing,n10HRing,n11HRing,n12HRing,nG12HRing,naRing,n3aRing,n4aRing,n5aRing,n6aRing,n7aRing,n8aRing,n9aRing,n10aRing,n11aRing,n12aRing,nG12aRing,naHRing,n3aHRing,n4aHRing,n5aHRing,n6aHRing,n7aHRing,n8aHRing,n9aHRing,n10aHRing,n11aHRing,n12aHRing,nG12aHRing,nARing,n3ARing,n4ARing,n5ARing,n6ARing,n7ARing,n8ARing,n9ARing,n10ARing,n11ARing,n12ARing,nG12ARing,nAHRing,n3AHRing,n4AHRing,n5AHRing,n6AHRing,n7AHRing,n8AHRing,n9AHRing,n10AHRing,n11AHRing,n12AHRing,nG12AHRing,nFRing,n4FRing,n5FRing,n6FRing,n7FRing,n8FRing,n9FRing,n10FRing,n11FRing,n12FRing,nG12FRing,nFHRing,n4FHRing,n5FHRing,n6FHRing,n7FHRing,n8FHRing,n9FHRing,n10FHRing,n11FHRing,n12FHRing,nG12FHRing,nFaRing,n4FaRing,n5FaRing,n6FaRing,n7FaRing,n8FaRing,n9FaRing,n10FaRing,n11FaRing,n12FaRing,nG12FaRing,nFaHRing,n4FaHRing,n5FaHRing,n6FaHRing,n7FaHRing,n8FaHRing,n9FaHRing,n10FaHRing,n11FaHRing,n12FaHRing,nG12FaHRing,nFARing,n4FARing,n5FARing,n6FARing,n7FARing,n8FARing,n9FARing,n10FARing,n11FARing,n12FARing,nG12FARing,nFAHRing,n4FAHRing,n5FAHRing,n6FAHRing,n7FAHRing,n8FAHRing,n9FAHRing,n10FAHRing,n11FAHRing,n12FAHRing,nG12FAHRing,nRot,RotRatio,SLogP,SMR,TopoPSA(NO),TopoPSA,GGI1,GGI2,GGI3,GGI4,GGI5,GGI6,GGI7,GGI8,GGI9,GGI10,JGI1,JGI2,JGI3,JGI4,JGI5,JGI6,JGI7,JGI8,JGI9,JGI10,JGT10,Diameter,Radius,TopoShapeIndex,PetitjeanIndex,Vabc,VAdjMat,MWC01,MWC02,MWC03,MWC04,MWC05,MWC06,MWC07,MWC08,MWC09,MWC10,TMWC10,SRW02,SRW03,SRW04,SRW05,SRW06,SRW07,SRW08,SRW09,SRW10,TSRW10,MW,AMW,WPath,WPol,Zagreb1,Zagreb2,mZagreb1,mZagreb2

\clearpage
\bibliography{bibliography-prescreening}

\end{document}
