-- -- first, define the datatype. Turn off echoing so that expected file -- does not depend on contents of rdkit.sql. -- SET client_min_messages = warning; \set ECHO none RESET client_min_messages; SELECT is_valid_smiles('c1ccccc1'); is_valid_smiles ----------------- t (1 row) SELECT is_valid_smiles('c1ccccc'); is_valid_smiles ----------------- f (1 row) SELECT is_valid_smiles('c1cccn1'); is_valid_smiles ----------------- f (1 row) SELECT is_valid_smarts('c1ccc[n,c]1'); is_valid_smarts ----------------- t (1 row) CREATE TABLE pgmol (id int, m mol); \copy pgmol from 'data/data' CREATE UNIQUE INDEX mol_ididx ON pgmol (id); SELECT count(*) FROM pgmol; count ------- 1000 (1 row) SELECT count(*) FROM pgmol WHERE m @> 'c1ccccc1'; count ------- 901 (1 row) SELECT count(*) FROM pgmol WHERE m @> 'c1cccnc1'; count ------- 245 (1 row) SELECT count(*) FROM pgmol WHERE 'c1ccccc1' <@ m; count ------- 901 (1 row) SELECT count(*) FROM pgmol WHERE 'c1cccnc1' <@ m; count ------- 245 (1 row) SELECT count(*) FROM pgmol WHERE m @> mol_from_smarts('c1ccccc1'); count ------- 901 (1 row) SELECT count(*) FROM pgmol WHERE m @> mol_from_smarts('c1cccnc1'); count ------- 245 (1 row) SELECT count(*) FROM pgmol WHERE m @> mol_from_smarts('c1ccc[n,c]c1'); count ------- 939 (1 row) SELECT count(*) FROM pgmol WHERE mol_from_smarts('c1ccccc1') <@ m; count ------- 901 (1 row) SELECT count(*) FROM pgmol WHERE mol_from_smarts('c1ccc[n,c]c1') <@ m; count ------- 939 (1 row) SELECT id, rdkit_fp(m) AS f INTO pgbfp FROM pgmol; CREATE UNIQUE INDEX bfp_ididx ON pgbfp (id); SELECT id, morgan_fp(m,1) AS f INTO pgsfp FROM pgmol; CREATE UNIQUE INDEX sfp_ididx ON pgsfp (id); SELECT id, torsion_fp(m) AS f INTO pgtorsfp FROM pgmol; SELECT id, atompair_fp(m) AS f INTO pgpairfp FROM pgmol; set rdkit.tanimoto_threshold=0.5; set rdkit.dice_threshold=0.5; SELECT id, tanimoto_sml(rdkit_fp('C1C(OC2=CC(=CC(=C2C1=O)O)O)'::mol), f) FROM (SELECT * FROM pgbfp ORDER BY id) AS t WHERE rdkit_fp('C1C(OC2=CC(=CC(=C2C1=O)O)O)'::mol) % f LIMIT 10; id | tanimoto_sml ----+-------------- (0 rows) SELECT id, dice_sml(rdkit_fp('C1C(OC2=CC(=CC(=C2C1=O)O)O)'::mol), f) FROM (SELECT * FROM pgbfp ORDER BY id) AS t WHERE rdkit_fp('C1C(OC2=CC(=CC(=C2C1=O)O)O)'::mol) % f LIMIT 10; id | dice_sml ----+---------- (0 rows) SELECT id, tanimoto_sml(rdkit_fp('C1C(OC2=CC(=CC(=C2C1=O)O)O)'::mol), f) FROM (SELECT * FROM pgbfp ORDER BY id) AS t WHERE rdkit_fp('C1C(OC2=CC(=CC(=C2C1=O)O)O)'::mol) # f LIMIT 10; id | tanimoto_sml --------+------------------- 66722 | 0.360103626943005 498250 | 0.368298368298368 576770 | 0.353684210526316 644427 | 0.337016574585635 645921 | 0.365942028985507 690546 | 0.402 698576 | 0.422657952069717 714484 | 0.383073496659243 771595 | 0.343936381709742 788060 | 0.333865814696486 (10 rows) SELECT id, dice_sml(rdkit_fp('C1C(OC2=CC(=CC(=C2C1=O)O)O)'::mol), f), size(f) FROM (SELECT * FROM pgbfp ORDER BY id) AS t WHERE rdkit_fp('C1C(OC2=CC(=CC(=C2C1=O)O)O)'::mol) # f LIMIT 10; id | dice_sml | size --------+-------------------+------ 66722 | 0.52952380952381 | 1024 498250 | 0.538330494037479 | 1024 576770 | 0.522550544323484 | 1024 644427 | 0.504132231404959 | 1024 645921 | 0.535809018567639 | 1024 690546 | 0.573466476462197 | 1024 698576 | 0.594180704441041 | 1024 714484 | 0.553945249597424 | 1024 771595 | 0.511834319526627 | 1024 788060 | 0.50059880239521 | 1024 (10 rows) set rdkit.tanimoto_threshold=0.4; SELECT id, tanimoto_sml(morgan_fp('C1C(OC2=CC(=CC(=C2C1=O)))'::mol, 1), f) FROM (SELECT * FROM pgsfp ORDER BY id) AS t WHERE morgan_fp('C1C(OC2=CC(=CC(=C2C1=O)))'::mol, 1) % f LIMIT 10; id | tanimoto_sml ---------+------------------- 3761688 | 0.441860465116279 (1 row) SELECT id, dice_sml(morgan_fp('C1C(OC2=CC(=CC(=C2C1=O)))'::mol, 1), f) FROM (SELECT * FROM pgsfp ORDER BY id) AS t WHERE morgan_fp('C1C(OC2=CC(=CC(=C2C1=O)))'::mol, 1) % f LIMIT 10; id | dice_sml ---------+------------------- 3761688 | 0.612903225806452 (1 row) SELECT id, tanimoto_sml(morgan_fp('C1C(OC2=CC(=CC(=C2C1=O)O)O)'::mol, 1), f) FROM (SELECT * FROM pgsfp ORDER BY id) AS t WHERE morgan_fp('C1C(OC2=CC(=CC(=C2C1=O)O)O)'::mol, 1) # f LIMIT 10; id | tanimoto_sml ----------+------------------- 902176 | 0.347826086956522 2952787 | 0.365853658536585 5281628 | 0.346153846153846 10560368 | 0.435897435897436 16196768 | 0.375 (5 rows) SELECT id, dice_sml(morgan_fp('C1C(OC2=CC(=CC(=C2C1=O)O)O)'::mol, 1), f) FROM (SELECT * FROM pgsfp ORDER BY id) AS t WHERE morgan_fp('C1C(OC2=CC(=CC(=C2C1=O)O)O)'::mol, 1) # f LIMIT 10; id | dice_sml ----------+------------------- 902176 | 0.516129032258065 2952787 | 0.535714285714286 5281628 | 0.514285714285714 10560368 | 0.607142857142857 16196768 | 0.545454545454545 (5 rows) select dice_sml(morgan_fp('C1C(OC2=CC(=CC(=C2C1=O)O)O)'::mol, 1), morgan_fp('C1C(OC2=CC(=CC(=C2C1=O)O)N)'::mol, 1)) sml; sml ------------------- 0.884615384615385 (1 row) select dice_sml(featmorgan_fp('C1C(OC2=CC(=CC(=C2C1=O)O)O)'::mol, 1), featmorgan_fp('C1C(OC2=CC(=CC(=C2C1=O)O)N)'::mol, 1)) sml; sml ------------------- 0.884615384615385 (1 row) select dice_sml(morganbv_fp('C1C(OC2=CC(=CC(=C2C1=O)O)O)'::mol, 1), morganbv_fp('C1C(OC2=CC(=CC(=C2C1=O)O)N)'::mol, 1)) sml; sml ------------------- 0.888888888888889 (1 row) select dice_sml(featmorganbv_fp('C1C(OC2=CC(=CC(=C2C1=O)O)O)'::mol, 1), featmorganbv_fp('C1C(OC2=CC(=CC(=C2C1=O)O)N)'::mol, 1)) sml; sml ------------------- 0.903225806451613 (1 row) select 'Cc1ccccc1'::mol@='c1ccccc1C'::mol; ?column? ---------- t (1 row) select 'Cc1ccccc1'::mol@='c1ccccc1CC'::mol; ?column? ---------- f (1 row) select 'Cc1ccccc1'::mol@='c1cccnc1C'::mol; ?column? ---------- f (1 row) select subtract(torsion_fp('CCC1CCNCC1'),torsion_fp('OCC1CCNCC1'))=subtract(torsion_fp('CCC1CCOCC1'),torsion_fp('OCC1CCOCC1')); ?column? ---------- t (1 row) select subtract(torsion_fp('CCC1CCNCC1'),torsion_fp('OCC1CCNCC1'))=subtract(torsion_fp('CCC1CCOCC1'),torsion_fp('NCC1CCOCC1')); ?column? ---------- f (1 row) select add(torsion_fp('CCC1CCNCC1'),torsion_fp('OCC1CCNCC1'))=add(torsion_fp('CCC1CCOCC1'),torsion_fp('OCC1CCOCC1')); ?column? ---------- f (1 row) select add(torsion_fp('CCC1CCNCC1'),torsion_fp('OCC1CCNCC1'))=add(torsion_fp('CCC1CCOCC1'),torsion_fp('NCC1CCOCC1')); ?column? ---------- f (1 row) select add(torsion_fp('CCC1CCNCC1'),torsion_fp('OCC1CCNCC1'))=subtract(torsion_fp('CCC1CCNCC1'),torsion_fp('OCC1CCNCC1')); ?column? ---------- f (1 row) select add(torsion_fp('CCC1CCNCC1'),torsion_fp('OCC1CCNCC1'))=subtract(torsion_fp('CCC1CCOCC1'),torsion_fp('OCC1CCOCC1')); ?column? ---------- f (1 row) select is_valid_ctab('chiral1.mol ChemDraw04200416412D 5 4 0 0 0 0 0 0 0 0999 V2000 -0.0141 0.0553 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 0.8109 0.0553 0.0000 F 0 0 0 0 0 0 0 0 0 0 0 0 -0.4266 0.7697 0.0000 Br 0 0 0 0 0 0 0 0 0 0 0 0 -0.0141 -0.7697 0.0000 Cl 0 0 0 0 0 0 0 0 0 0 0 0 -0.8109 -0.1583 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 1 2 1 0 1 3 1 0 1 4 1 1 1 5 1 0 M END'); is_valid_ctab --------------- t (1 row) select is_valid_ctab('invalid'); is_valid_ctab --------------- f (1 row) select mol_from_ctab('chiral1.mol ChemDraw04200416412D 5 4 0 0 0 0 0 0 0 0999 V2000 -0.0141 0.0553 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 0.8109 0.0553 0.0000 F 0 0 0 0 0 0 0 0 0 0 0 0 -0.4266 0.7697 0.0000 Br 0 0 0 0 0 0 0 0 0 0 0 0 -0.0141 -0.7697 0.0000 Cl 0 0 0 0 0 0 0 0 0 0 0 0 -0.8109 -0.1583 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 1 2 1 0 1 3 1 0 1 4 1 1 1 5 1 0 M END'); mol_from_ctab ---------------- C[C@](F)(Cl)Br (1 row) -- GitHub issue 9 select 'C1CC2CC3C45C2C2C6C7C8C9C%10C(C1)C1C%11%10C%109C98C87C76C42C24C65C3C3C56C64C4%12C72C28C79C8%10C9%11C1C1C%109C98C87C42C24C7%12C%116C65C3C3C56C6%11C%117C74C4%12C82C29C8%10C1C1C98C42C24C89C1C1C98C84C4%10C%122C27C7%11C%116C65C3C3C56C6%11C%117C42C24C7%11C%116C65C3C3C56C6%11C%117C74C4%12C%102C28C89C1C1C98C42C24C89C1C1C98C84C4%10C%122C27C7%11C%116C65C3C3C56C6%11C%117C42C24C7%11C%116C65C3C3C56C6%11C%117C74C4%12C%102C28C89C1C1C98C42C24C89C1CC8C4C1C%122C27C4%11C76C65C3CC6C7C4C12'::mol; WARNING: makeMolText: problems converting molecule to SMILES/SMARTS mol ----- (1 row) -- chiral matching select 'C[C@H](F)Cl'::mol@>'CC(F)Cl'::mol as match; match ------- t (1 row) select 'C[C@H](F)Cl'::mol@>'C[C@H](F)Cl'::mol as match; match ------- t (1 row) select 'C[C@H](F)Cl'::mol@>'C[C@@H](F)Cl'::mol as match; match ------- t (1 row) set rdkit.do_chiral_sss=true; select 'C[C@H](F)Cl'::mol@>'CC(F)Cl'::mol as match; match ------- t (1 row) select 'C[C@H](F)Cl'::mol@>'C[C@H](F)Cl'::mol as match; match ------- t (1 row) select 'C[C@H](F)Cl'::mol@>'C[C@@H](F)Cl'::mol as match; match ------- f (1 row) set rdkit.do_chiral_sss=false; -- substructure counts select substruct_count('c1ccncc1'::mol,'c1ccncc1'::mol); substruct_count ----------------- 1 (1 row) select substruct_count('c1ccncc1'::mol,'c1ccncc1'::mol,false); substruct_count ----------------- 2 (1 row)