Monday, September 14, 2020

Interactively exploring ScaffoldNetwork options

The ScafffoldNetwork code added in RDKit release 2020.03 (described here: https://doi.org/10.1021/acs.jcim.0c00296) has a lot of options. In this blog post I'll show how to use an interactive view based on ipycytoscape to explore the scaffold networks and look at the effects of the various options. There's not a lot of text here since this is mainly about providing the interactive demo. Of course it doesn't work in blogger, so you'll need to download the notebook from the github repo to try it yourself.

This draws on a blog post from pen as well as some of the examples/tutorials provided by Mariana Meireles and the rest of the ipycytoscape team.

In [1]:
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsole
IPythonConsole.drawOptions.maxFontSize=18
from rdkit.Chem import rdDepictor
from urllib import parse
from rdkit.Chem.Draw import rdMolDraw2D
import gzip
import time
import rdkit
import logging
from rdkit import RDLogger
from rdkit.Chem.Scaffolds import rdScaffoldNetwork

print(rdkit.__version__)
%load_ext sql
%pylab inline
2020.09.1dev1
Populating the interactive namespace from numpy and matplotlib

The dataset I'll use here is the set of "very active" molecules from ChEMBL that I put together for an earlier blog post. Start by reading that in and making sure we have unique SMILES:

In [2]:
moldict = dict((Chem.MolToSmiles(x),x) for x in \
               Chem.ForwardSDMolSupplier(gzip.open('../data/chembl26_very_active.sdf.gz')) if x.GetNumAtoms()<75)
len(moldict)
Out[2]:
22626
In [3]:
ms = list(moldict.values())
In [4]:
from rdkit.Chem.Draw import rdMolDraw2D
from urllib import parse

# some code lightly adapted from pen: 
# https://iwatobipen.wordpress.com/2020/03/30/draw-scaffold-tree-as-network-with-molecular-image-rdkit-cytoscape/
def smi2svg(smi):
    mol = Chem.MolFromSmiles(smi)
    try:
        Chem.rdmolops.Kekulize(mol)
    except:
        pass
    drawer = rdMolDraw2D.MolDraw2DSVG(350, 300)
    rdMolDraw2D.PrepareAndDrawMolecule(drawer,mol)
    drawer.FinishDrawing()
    svg = drawer.GetDrawingText()
    return svg
  
def smi2image(smi):
    svg_string = smi2svg(smi)
    impath = 'data:image/svg+xml;charset=utf-8,' + parse.quote(svg_string, safe="")
    return impath
In [26]:
import networkx as nx
import ipycytoscape
import ipywidgets as widgets
from ipywidgets import interact,fixed


tms = ms[:50]
def show_scaffolds(mols,which=None,layout='dagre',includeGenericScaffolds=False,includeGenericBondScaffolds=False,
                   includeScaffoldsWithAttachments=True,includeScaffoldsWithoutAttachments=False,
                   pruneBeforeFragmenting=True,
                  keepOnlyFirstFragment=True):
    # the scaffold generation process is verbose, disable the messages sent to the info log:
    RDLogger.DisableLog('rdApp.info')

    if which is not None:
        mols = [mols[which]]
    scaffParams = rdScaffoldNetwork.ScaffoldNetworkParams()
    scaffParams.collectMolCounts = True
    scaffParams.includeGenericScaffolds = includeGenericScaffolds
    scaffParams.includeScaffoldsWithoutAttachments = includeScaffoldsWithoutAttachments
    scaffParams.keepOnlyFirstFragment = keepOnlyFirstFragment
    scaffParams.includeGenericBondScaffolds = includeGenericBondScaffolds
    scaffParams.includeScaffoldsWithAttachments = includeScaffoldsWithAttachments
    scaffParams.pruneBeforeFragmenting = pruneBeforeFragmenting
    net = rdScaffoldNetwork.CreateScaffoldNetwork(mols,scaffParams)

    g = nx.graph.Graph()
    for idx, node in enumerate(net.nodes):
        g.add_node(idx, count=f'count: {net.molCounts[idx]}', smiles=node, img=smi2image(node), hac=Chem.MolFromSmiles(node).GetNumAtoms())
    g.add_edges_from([(e.beginIdx,e.endIdx) for e in net.edges])
    
    directed = ipycytoscape.CytoscapeWidget()
    directed.set_style([{
                            'selector': 'node',
                            'css': {
                                'content': 'data(count)',
                                'text-valign': 'top',
                                'color': 'white',
                                'text-outline-width': 2,
                                'text-outline-color': '#9dbaea',
                                'shape' : 'rectangle',
                                'width':350,
                                'height':300,
                                'background-color': '#9dbaea',
                                'background-fit':'contain',
                                'background-image':'data(img)',

                            }
                            },
                            {
                            'selector': 'edge',
                            'style': {
                                'width': 4,
                                'line-color': '#9dbaea',
                                'target-arrow-shape': 'triangle',
                                'target-arrow-color': '#9dbaea',
                                'curve-style': 'bezier'
                            }
                            },
                            {
                            'selector': ':selected',
                            'css': {
                                'background-color': 'black',
                                'line-color': 'black',
                                'target-arrow-color': 'black',
                                'source-arrow-color': 'black',
                                'text-outline-color': 'black'
                            }}
                            ])
    directed.set_layout(name=layout, nodeSpacing=50, edgeLengthVal=50)
    directed.graph.add_graph_from_networkx(g, directed=True)
    return directed


@interact(mols=fixed(tms),which=range(0,len(tms)),layout=['dagre','breadthfirst','concentric','cose'])
def interactively_show_scaffolds(mols,which=None,layout='dagre',includeGenericScaffolds=False,includeGenericBondScaffolds=False,
                   includeScaffoldsWithAttachments=True,includeScaffoldsWithoutAttachments=False,
                   pruneBeforeFragmenting=True,
                  keepOnlyFirstFragment=True):
    # the scaffold generation process is verbose, disable the messages sent to the info log:
    RDLogger.DisableLog('rdApp.info')

    if which is None:
        return
    return show_scaffolds(mols,which=which,layout=layout,includeGenericScaffolds=includeGenericScaffolds,
                          includeGenericBondScaffolds=includeGenericBondScaffolds,
                          includeScaffoldsWithAttachments=includeScaffoldsWithAttachments,
                         includeScaffoldsWithoutAttachments=includeScaffoldsWithoutAttachments,
                         pruneBeforeFragmenting=pruneBeforeFragmenting,keepOnlyFirstFragment=keepOnlyFirstFragment)
Here's what it looks like in action: