When I try to featurize the list of molecules using MolGraphConv featurizer, I get the exception mentioned below and it keeps running indefinitely.
I try to upsample the existing molecules by generating conformers and then I featurize these molecules contained in the list mols.
Code:
def upsample(data):
ps = rdDistGeom.ETKDGv3()
ps.randomSeed = 0xf00d
mols = []
labels = []
for i in range(len(data.iloc[0:3,:])):
print(i)
sm = data.iloc[i][‘smiles’]
status = data.iloc[i][‘status’]
m = Chem.MolFromSmiles(sm)
m1=Chem.AddHs(m)
bounds = rdDistGeom.GetMoleculeBoundsMatrix(m1)
ps.SetBoundsMat(bounds)
if status == 1:
cids = AllChem.EmbedMultipleConfs(m1,8,ps)
else:
cids = AllChem.EmbedMultipleConfs(m1, 4,ps)
print(status,len(cids))
mols.append(m)
labels.append(status)
for j in range(len(cids)):
mol = m1.GetConformer(cids[j])
m2 = mol.GetOwningMol()
Chem.SanitizeMol(m2)
mols.append(m2)
labels.append(status)
return mols,labels
mols,labels = upsample(df_train)
feat = dc.feat.MolGraphConvFeaturizer()
features = feat.featurize(mols)
print(features)
Error:
Failed to featurize datapoint 10, *. Appending empty array
Exception message: Pre-condition Violation
RingInfo not initialized
Violation occurred on line 35 in file Code/GraphMol/RingInfo.cpp
Failed Expression: df_init
RDKIT: 2020.09.1
BOOST: 1_74