RDKit
Open-source cheminformatics and machine learning.
MolWriters.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2002-2013 Greg Landrum, Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 
11 #ifndef _RD_MOLWRITERS_H_
12 #define _RD_MOLWRITERS_H_
13 
14 #include <RDGeneral/types.h>
15 
16 #include <string>
17 #include <iostream>
18 #include <GraphMol/ROMol.h>
19 
20 namespace RDKit {
21 
22  static int defaultConfId=-1;
23  class MolWriter {
24  public:
25  virtual ~MolWriter() {}
26  virtual void write(const ROMol &mol,int confId=defaultConfId) = 0;
27  virtual void flush() = 0;
28  virtual void close() = 0;
29  virtual void setProps(const STR_VECT &propNames)=0;
30  virtual unsigned int numMols() const =0;
31  };
32 
33  //! The SmilesWriter is for writing molecules and properties to
34  //! delimited text files.
35  class SmilesWriter : public MolWriter {
36  /******************************************************************************
37  * A Smiles Table writer - this is how it is used
38  * - create a SmilesWriter with a output file name (or a ostream), a delimiter,
39  * and a list of properties that need to be written out
40  * - then a call is made to the write function for each molecule that needs to
41  * be written out
42  ******************************************************************************/
43  public:
44  /*!
45  \param fileName : filename to write to ("-" to write to stdout)
46  \param delimiter : delimiter to use in the text file
47  \param nameHeader : used to label the name column in the output. If this
48  is provided as the empty string, no names will be written.
49  \param includeHeader : toggles inclusion of a header line in the output
50  \param isomericSmiles : toggles generation of isomeric SMILES
51  \param kekuleSmiles : toggles the generation of kekule SMILES
52 
53  */
54  SmilesWriter(std::string fileName,
55  std::string delimiter=" ",
56  std::string nameHeader="Name",
57  bool includeHeader=true,
58  bool isomericSmiles=false,
59  bool kekuleSmiles=false);
60  //! \overload
61  SmilesWriter(std::ostream *outStream,
62  std::string delimiter=" ",
63  std::string nameHeader="Name",
64  bool includeHeader=true,
65  bool takeOwnership=false,
66  bool isomericSmiles=false,
67  bool kekuleSmiles=false);
68 
69  ~SmilesWriter();
70 
71  //! \brief set a vector of property names that are need to be
72  //! written out for each molecule
73  void setProps(const STR_VECT &propNames);
74 
75  //! \brief write a new molecule to the file
76  void write(const ROMol &mol,int confId=defaultConfId);
77 
78  //! \brief flush the ostream
79  void flush() {
80  PRECONDITION(dp_ostream,"no output stream");
81  dp_ostream->flush();
82  };
83 
84  //! \brief close our stream (the writer cannot be used again)
85  void close() {
86  PRECONDITION(dp_ostream,"no output stream");
87  dp_ostream->flush();
88  if(df_owner) {
89  delete dp_ostream;
90  df_owner=false;
91  }
92  dp_ostream=NULL;
93  };
94 
95  //! \brief get the number of molecules written so far
96  unsigned int numMols() const { return d_molid;} ;
97 
98  private:
99  // local initialization
100  void init(std::string delimiter,std::string nameHeader,
101  bool includeHeader,
102  bool isomericSmiles,
103  bool kekuleSmiles);
104 
105 
106  // dumps a header line to the output stream
107  void dumpHeader() const;
108 
109 
110  std::ostream *dp_ostream;
111  bool df_owner;
112  bool df_includeHeader; // whether or not to include a title line
113  unsigned int d_molid; // the number of the molecules we wrote so far
114  std::string d_delim; // delimiter string between various records
115  std::string d_nameHeader; // header for the name column in the output file
116  STR_VECT d_props; // list of property name that need to be written out
117  bool df_isomericSmiles; // whether or not to do isomeric smiles
118  bool df_kekuleSmiles; // whether or not to do kekule smiles
119  };
120 
121 
122  //! The SDWriter is for writing molecules and properties to
123  //! SD files
124  class SDWriter : public MolWriter {
125  /**************************************************************************************
126  * A SD file ( or stream) writer - this is how it is used
127  * - create a SDMolWriter with a output file name (or a ostream),
128  * and a list of properties that need to be written out
129  * - then a call is made to the write function for each molecule that needs to be written out
130  **********************************************************************************************/
131  public:
132  /*!
133  \param fileName : filename to write to ("-" to write to stdout)
134  */
135  SDWriter(std::string fileName);
136  SDWriter(std::ostream *outStream,bool takeOwnership=false);
137 
138  ~SDWriter();
139 
140  //! \brief set a vector of property names that are need to be
141  //! written out for each molecule
142  void setProps(const STR_VECT &propNames);
143 
144  //! \brief write a new molecule to the file
145  void write(const ROMol &mol, int confId=defaultConfId);
146 
147  //! \brief flush the ostream
148  void flush() {
149  PRECONDITION(dp_ostream,"no output stream");
150  dp_ostream->flush();
151  } ;
152 
153  //! \brief close our stream (the writer cannot be used again)
154  void close() {
155  PRECONDITION(dp_ostream,"no output stream");
156  dp_ostream->flush();
157  if(df_owner) {
158  delete dp_ostream;
159  df_owner=false;
160  }
161  dp_ostream=NULL;
162  };
163 
164  //! \brief get the number of molecules written so far
165  unsigned int numMols() const { return d_molid; };
166 
167  void setForceV3000(bool val) { df_forceV3000=val; };
168  bool getForceV3000() const { return df_forceV3000; };
169 
170  void setKekulize(bool val) { df_kekulize=val; };
171  bool getKekulize() const { return df_kekulize; };
172 
173  private:
174  void writeProperty(const ROMol &mol, std::string name);
175 
176  std::ostream *dp_ostream;
177  bool df_owner;
178  unsigned int d_molid; // the number of the molecules we wrote so far
179  STR_VECT d_props; // list of property name that need to be written out
180  bool df_forceV3000; // force writing the mol blocks as V3000
181  bool df_kekulize; // toggle kekulization of molecules on writing
182  };
183 
184  //! The TDTWriter is for writing molecules and properties to
185  //! TDT files
186  class TDTWriter : public MolWriter {
187  /**************************************************************************************
188  * A TDT file ( or stream) writer - this is how it is used
189  * - create a TDTWriter with a output file name (or a ostream),
190  * and a list of properties that need to be written out
191  * - then a call is made to the write function for each molecule that needs to be written out
192  **********************************************************************************************/
193  public:
194  /*!
195  \param fileName : filename to write to ("-" to write to stdout)
196  */
197  TDTWriter(std::string fileName);
198  TDTWriter(std::ostream *outStream,bool takeOwnership=false);
199 
200  ~TDTWriter();
201 
202  //! \brief set a vector of property names that are need to be
203  //! written out for each molecule
204  void setProps(const STR_VECT &propNames);
205 
206  //! \brief write a new molecule to the file
207  void write(const ROMol &mol, int confId=defaultConfId);
208 
209  //! \brief flush the ostream
210  void flush() {
211  PRECONDITION(dp_ostream,"no output stream");
212  dp_ostream->flush();
213  };
214 
215  //! \brief close our stream (the writer cannot be used again)
216  void close() {
217  PRECONDITION(dp_ostream,"no output stream");
218  dp_ostream->flush();
219  if(df_owner) {
220  delete dp_ostream;
221  df_owner=false;
222  }
223  dp_ostream=NULL;
224  };
225 
226  //! \brief get the number of molecules written so far
227  unsigned int numMols() const { return d_molid; };
228 
229  void setWrite2D(bool state=true) { df_write2D=state; };
230  bool getWrite2D() const { return df_write2D; };
231 
232  void setWriteNames(bool state=true) { df_writeNames=state; };
233  bool getWriteNames() const { return df_writeNames; };
234 
235  void setNumDigits(unsigned int numDigits) { d_numDigits=numDigits; };
236  unsigned int getNumDigits() const { return d_numDigits;};
237 
238  private:
239  void writeProperty(const ROMol &mol, std::string name);
240 
241  std::ostream *dp_ostream;
242  bool df_owner;
243  unsigned int d_molid; // the number of molecules we wrote so far
244  STR_VECT d_props; // list of property name that need to be written out
245  bool df_write2D; // write 2D coordinates instead of 3D
246  bool df_writeNames; // write a name record for each molecule
247  unsigned int d_numDigits; // number of digits to use in our output of coordinates;
248  };
249 
250  //! The PDBWriter is for writing molecules to Brookhaven Protein
251  //! DataBank format files.
252  class PDBWriter : public MolWriter {
253  public:
254  PDBWriter(std::string fileName, unsigned int flavor = 0);
255  PDBWriter(std::ostream *outStream, bool takeOwnership=false,
256  unsigned int flavor = 0);
257  ~PDBWriter();
258 
259  //! \brief write a new molecule to the file
260  void write(const ROMol &mol, int confId=defaultConfId);
261 
262  void setProps(const STR_VECT&) {};
263 
264  //! \brief flush the ostream
265  void flush() {
266  PRECONDITION(dp_ostream,"no output stream");
267  dp_ostream->flush();
268  } ;
269 
270  //! \brief close our stream (the writer cannot be used again)
271  void close() {
272  PRECONDITION(dp_ostream,"no output stream");
273  dp_ostream->flush();
274  if(df_owner) {
275  delete dp_ostream;
276  df_owner=false;
277  }
278  dp_ostream=NULL;
279  };
280 
281  //! \brief get the number of molecules written so far
282  unsigned int numMols() const { return d_count;} ;
283 
284  private:
285  std::ostream *dp_ostream;
286  unsigned int d_flavor;
287  unsigned int d_count;
288  bool df_owner;
289  };
290 
291 
292 }
293 
294 #endif
295 
void close()
close our stream (the writer cannot be used again)
Definition: MolWriters.h:85
void setProps(const STR_VECT &propNames)
set a vector of property names that are need to be written out for each molecule
void setProps(const STR_VECT &)
Definition: MolWriters.h:262
TDTWriter(std::string fileName)
bool getWriteNames() const
Definition: MolWriters.h:233
virtual ~MolWriter()
Definition: MolWriters.h:25
void close()
close our stream (the writer cannot be used again)
Definition: MolWriters.h:216
virtual unsigned int numMols() const =0
void flush()
flush the ostream
Definition: MolWriters.h:265
virtual void close()=0
void flush()
flush the ostream
Definition: MolWriters.h:79
void setProps(const STR_VECT &propNames)
set a vector of property names that are need to be written out for each molecule
Defines the primary molecule class ROMol as well as associated typedefs.
void close()
close our stream (the writer cannot be used again)
Definition: MolWriters.h:154
void close()
close our stream (the writer cannot be used again)
Definition: MolWriters.h:271
ROMol is a molecule class that is intended to have a fixed topology.
Definition: ROMol.h:105
unsigned int numMols() const
get the number of molecules written so far
Definition: MolWriters.h:96
bool getForceV3000() const
Definition: MolWriters.h:168
void flush()
flush the ostream
Definition: MolWriters.h:148
bool getWrite2D() const
Definition: MolWriters.h:230
unsigned int numMols() const
get the number of molecules written so far
Definition: MolWriters.h:227
void write(const ROMol &mol, int confId=defaultConfId)
write a new molecule to the file
virtual void flush()=0
static int defaultConfId
Definition: MolWriters.h:22
void write(const ROMol &mol, int confId=defaultConfId)
write a new molecule to the file
void setNumDigits(unsigned int numDigits)
Definition: MolWriters.h:235
void setWriteNames(bool state=true)
Definition: MolWriters.h:232
virtual void setProps(const STR_VECT &propNames)=0
SmilesWriter(std::string fileName, std::string delimiter=" ", std::string nameHeader="Name", bool includeHeader=true, bool isomericSmiles=false, bool kekuleSmiles=false)
void setForceV3000(bool val)
Definition: MolWriters.h:167
Includes a bunch of functionality for handling Atom and Bond queries.
Definition: Atom.h:28
bool getKekulize() const
Definition: MolWriters.h:171
void write(const ROMol &mol, int confId=defaultConfId)
write a new molecule to the file
unsigned int getNumDigits() const
Definition: MolWriters.h:236
virtual void write(const ROMol &mol, int confId=defaultConfId)=0
unsigned int numMols() const
get the number of molecules written so far
Definition: MolWriters.h:165
#define PRECONDITION(expr, mess)
Definition: Invariant.h:119
SDWriter(std::string fileName)
void setWrite2D(bool state=true)
Definition: MolWriters.h:229
void write(const ROMol &mol, int confId=defaultConfId)
write a new molecule to the file
unsigned int numMols() const
get the number of molecules written so far
Definition: MolWriters.h:282
void setProps(const STR_VECT &propNames)
set a vector of property names that are need to be written out for each molecule
void flush()
flush the ostream
Definition: MolWriters.h:210
PDBWriter(std::string fileName, unsigned int flavor=0)
std::vector< std::string > STR_VECT
Definition: Dict.h:26
void setKekulize(bool val)
Definition: MolWriters.h:170