OpenMS
Loading...
Searching...
No Matches
FeatureLinkerBase.cpp
Go to the documentation of this file.
1// --------------------------------------------------------------------------
2// OpenMS -- Open-Source Mass Spectrometry
3// --------------------------------------------------------------------------
4// Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5// ETH Zurich, and Freie Universitaet Berlin 2002-2023.
6//
7// This software is released under a three-clause BSD license:
8// * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10// * Redistributions in binary form must reproduce the above copyright
11// notice, this list of conditions and the following disclaimer in the
12// documentation and/or other materials provided with the distribution.
13// * Neither the name of any author or any participating institution
14// may be used to endorse or promote products derived from this software
15// without specific prior written permission.
16// For a full list of authors, refer to the file AUTHORS.
17// --------------------------------------------------------------------------
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21// ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22// INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29//
30// --------------------------------------------------------------------------
31// $Maintainer: Timo Sachsenberg $
32// $Authors: Marc Sturm, Clemens Groepl, Steffen Sass $
33// --------------------------------------------------------------------------
34
35#include <OpenMS/FORMAT/ConsensusXMLFile.h>
36#include <OpenMS/FORMAT/FeatureXMLFile.h>
37#include <OpenMS/FORMAT/FileHandler.h>
38#include <OpenMS/FORMAT/FileTypes.h>
39#include <OpenMS/ANALYSIS/MAPMATCHING/FeatureGroupingAlgorithm.h>
40#include <OpenMS/DATASTRUCTURES/ListUtils.h>
41#include <OpenMS/CONCEPT/ProgressLogger.h>
42#include <OpenMS/METADATA/ExperimentalDesign.h>
43#include <OpenMS/FORMAT/ExperimentalDesignFile.h>
44
45#include <OpenMS/KERNEL/ConversionHelper.h>
46
47#include <OpenMS/APPLICATIONS/TOPPBase.h>
48
49#include <iomanip> // setw
50
51using namespace OpenMS;
52using namespace std;
53
54//-------------------------------------------------------------
55//Doxygen docu
56//-------------------------------------------------------------
57
65// We do not want this class to show up in the docu:
67
68class TOPPFeatureLinkerBase :
69 public TOPPBase,
70 public ProgressLogger
71{
72
73public:
74 TOPPFeatureLinkerBase(String name, String description, bool official = true) :
75 TOPPBase(name, description, official)
76 {
77 }
78
79protected:
80 void registerOptionsAndFlags_() override // only for "unlabeled" algorithms!
81 {
82 registerInputFileList_("in", "<files>", ListUtils::create<String>(""), "input files separated by blanks", true);
83 setValidFormats_("in", ListUtils::create<String>("featureXML,consensusXML"));
84 registerOutputFile_("out", "<file>", "", "Output file", true);
85 setValidFormats_("out", ListUtils::create<String>("consensusXML"));
86 registerInputFile_("design", "<file>", "", "input file containing the experimental design", false);
87 setValidFormats_("design", ListUtils::create<String>("tsv"));
88 addEmptyLine_();
89 registerFlag_("keep_subelements", "For consensusXML input only: If set, the sub-features of the inputs are transferred to the output.");
90 }
91
92 ExitCodes common_main_(FeatureGroupingAlgorithm * algorithm,
93 bool labeled = false)
94 {
95 //-------------------------------------------------------------
96 // parameter handling
97 //-------------------------------------------------------------
98 StringList ins;
99 if (labeled)
100 {
101 ins.push_back(getStringOption_("in"));
102 }
103 else
104 {
105 ins = getStringList_("in");
106 }
107 String out = getStringOption_("out");
108
109 //-------------------------------------------------------------
110 // check for valid input
111 //-------------------------------------------------------------
112 // check if all input files have the correct type
113 FileTypes::Type file_type = FileHandler::getType(ins[0]);
114 for (Size i = 0; i < ins.size(); ++i)
115 {
116 if (FileHandler::getType(ins[i]) != file_type)
117 {
118 writeLogError_("Error: All input files must be of the same type!");
119 return ILLEGAL_PARAMETERS;
120 }
121 }
122
123 //-------------------------------------------------------------
124 // set up algorithm
125 //-------------------------------------------------------------
126 Param algorithm_param = getParam_().copy("algorithm:", true);
127 writeDebug_("Used algorithm parameters", algorithm_param, 3);
128 algorithm->setParameters(algorithm_param);
129
130 //-------------------------------------------------------------
131 // perform grouping
132 //-------------------------------------------------------------
133 // load input
134 ConsensusMap out_map;
135 StringList ms_run_locations;
136
137 String design_file;
138
139 // TODO: support design in labeled feature linker
140 if (!labeled)
141 {
142 design_file = getStringOption_("design");
143 }
144
145 if (file_type == FileTypes::CONSENSUSXML && !design_file.empty())
146 {
147 writeLogError_("Error: Using fractionated design with consensusXML als input is not supported!");
148 return ILLEGAL_PARAMETERS;
149 }
150
151 if (file_type == FileTypes::FEATUREXML)
152 {
153 OPENMS_LOG_INFO << "Linking " << ins.size() << " featureXMLs." << endl;
154
155 //-------------------------------------------------------------
156 // Extract (optional) fraction identifiers and associate with featureXMLs
157 //-------------------------------------------------------------
158
159 // determine map of fractions to MS files
160 map<unsigned, vector<String>> frac2files;
161
162 if (!design_file.empty())
163 {
164 // parse design file and determine fractions
165 ExperimentalDesign ed = ExperimentalDesignFile::load(design_file, false);
166
167 // determine if design defines more than one fraction
168 frac2files = ed.getFractionToMSFilesMapping();
169
170 writeDebug_(String("Grouping ") + String(ed.getNumberOfFractions()) + " fractions.", 3);
171
172 // check if all fractions have the same number of MS runs associated
173 if (!ed.sameNrOfMSFilesPerFraction())
174 {
175 writeLogError_("Error: Number of runs must match for every fraction!");
176 return ILLEGAL_PARAMETERS;
177 }
178 }
179 else // no design file given
180 {
181 for (Size i = 0; i != ins.size(); ++i)
182 {
183 frac2files[1].emplace_back(String("file") + String(i)); // associate each run with fraction 1
184 }
185 }
186
187 vector<FeatureMap > maps(ins.size());
188 FeatureXMLFile f;
189 FeatureFileOptions param = f.getOptions();
190
191 // to save memory don't load convex hulls and subordinates
192 param.setLoadSubordinates(false);
193 param.setLoadConvexHull(false);
194 f.setOptions(param);
195
196 Size progress = 0;
197 setLogType(ProgressLogger::CMD);
198 startProgress(0, ins.size(), "reading input");
199 for (Size i = 0; i < ins.size(); ++i)
200 {
201 FeatureMap tmp;
202 f.load(ins[i], tmp);
203
204 StringList ms_runs;
205 tmp.getPrimaryMSRunPath(ms_runs);
206
207 // associate mzML file with map i in consensusXML
208 if (ms_runs.size() > 1 || ms_runs.empty())
209 {
210 OPENMS_LOG_WARN << "Exactly one MS run should be associated with a FeatureMap. "
211 << ms_runs.size()
212 << " provided." << endl;
213 }
214 else
215 {
216 out_map.getColumnHeaders()[i].filename = ms_runs.front();
217 }
218 out_map.getColumnHeaders()[i].size = tmp.size();
219 out_map.getColumnHeaders()[i].unique_id = tmp.getUniqueId();
220
221 // copy over information on the primary MS run
222 ms_run_locations.insert(ms_run_locations.end(), ms_runs.begin(), ms_runs.end());
223
224 // to save memory, remove convex hulls, subordinates:
225 for (Feature& ft : tmp)
226 {
227 String adduct;
228 String group;
229 //exception: addduct information
230 if (ft.metaValueExists(Constants::UserParam::DC_CHARGE_ADDUCTS))
231 {
232 adduct = ft.getMetaValue(Constants::UserParam::DC_CHARGE_ADDUCTS);
233 }
234 if (ft.metaValueExists(Constants::UserParam::ADDUCT_GROUP))
235 {
236 group = ft.getMetaValue(Constants::UserParam::ADDUCT_GROUP);
237 }
238 ft.getSubordinates().clear();
239 ft.getConvexHulls().clear();
240 ft.clearMetaInfo();
241 if (!adduct.empty())
242 {
243 ft.setMetaValue(Constants::UserParam::DC_CHARGE_ADDUCTS, adduct);
244 }
245 if (!group.empty())
246 {
247 ft.setMetaValue("Group", group);
248 }
249
250 }
251
252 maps[i] = tmp;
253 maps[i].updateRanges();
254
255 setProgress(progress++);
256 }
257 endProgress();
258
259 // exception for "labeled" algorithms: copy file descriptions
260 if (labeled)
261 {
262 out_map.getColumnHeaders()[1] = out_map.getColumnHeaders()[0];
263 out_map.getColumnHeaders()[0].label = "light";
264 out_map.getColumnHeaders()[1].label = "heavy";
265 ms_run_locations.push_back(ms_run_locations[0]);
266 }
267
269 // invoke feature grouping algorithm
270
271 if (frac2files.size() == 1) // group one fraction
272 {
273 algorithm->group(maps, out_map);
274 }
275 else // group multiple fractions
276 {
277 writeDebug_(String("Stored in ") + String(maps.size()) + " maps.", 3);
278 for (Size i = 1; i <= frac2files.size(); ++i)
279 {
280 vector<FeatureMap> fraction_maps;
281 // TODO FRACTIONS: here we assume that the order of featureXML is from fraction 1..n
282 // we should check if these are shuffled and error / warn
283 for (size_t feature_map_index = 0; feature_map_index != frac2files[i].size(); ++feature_map_index)
284 {
285 fraction_maps.push_back(maps[feature_map_index]);
286 }
287 algorithm->group(fraction_maps, out_map);
288 }
289 }
290 }
291 else
292 {
293 //TODO isn't it better to have this option/functionality in the FeatureGroupingAlgorithm class?
294 // Otherwise everyone has to remember e.g. to annotate the old map_index etc.
295 bool keep_subelements = getFlag_("keep_subelements");
296 vector<ConsensusMap> maps(ins.size());
297 ConsensusXMLFile f;
298 for (Size i = 0; i < ins.size(); ++i)
299 {
300 f.load(ins[i], maps[i]);
301 maps[i].updateRanges();
302 // copy over information on the primary MS run
303 StringList ms_runs;
304 maps[i].getPrimaryMSRunPath(ms_runs);
305 ms_run_locations.insert(ms_run_locations.end(), ms_runs.begin(), ms_runs.end());
306 if (keep_subelements)
307 {
308 auto saveOldMapIndex =
309 [](PeptideIdentification &p)
310 {
311 if (p.metaValueExists("map_index"))
312 {
313 p.setMetaValue("old_map_index", p.getMetaValue("map_index"));
314 }
315 else
316 {
317 OPENMS_LOG_WARN << "Warning: map_index not found in PeptideID. The tool will not be able to assign a"
318 "consistent one. Check the settings of previous tools." << std::endl;
319 }
320 };
321 maps[i].applyFunctionOnPeptideIDs(saveOldMapIndex, true);
322 }
323 }
324 // group
325 algorithm->group(maps, out_map);
326
327 // set file descriptions:
328
329 if (!keep_subelements)
330 {
331 for (Size i = 0; i < ins.size(); ++i)
332 {
333 out_map.getColumnHeaders()[i].filename = ins[i];
334 out_map.getColumnHeaders()[i].size = maps[i].size();
335 out_map.getColumnHeaders()[i].unique_id = maps[i].getUniqueId();
336 }
337 }
338 else
339 {
340 // components of the output map are not the input maps themselves, but
341 // the components of the input maps:
342 algorithm->transferSubelements(maps, out_map);
343 }
344 }
345
346 // assign unique ids
347 out_map.applyMemberFunction(&UniqueIdInterface::setUniqueId);
348
349 // annotate output with data processing info
350 addDataProcessing_(out_map,
351 getProcessingInfo_(DataProcessing::FEATURE_GROUPING));
352
353
354 // sort list of peptide identifications in each consensus feature by map index
355 out_map.sortPeptideIdentificationsByMapIndex();
356
357 // write output
358 ConsensusXMLFile().store(out, out_map);
359
360 // some statistics
361 map<Size, UInt> num_consfeat_of_size;
362 for (const ConsensusFeature& cf : out_map)
363 {
364 ++num_consfeat_of_size[cf.size()];
365 }
366
367 OPENMS_LOG_INFO << "Number of consensus features:" << endl;
368 for (map<Size, UInt>::reverse_iterator i = num_consfeat_of_size.rbegin();
369 i != num_consfeat_of_size.rend(); ++i)
370 {
371 OPENMS_LOG_INFO << " of size " << setw(2) << i->first << ": " << setw(6)
372 << i->second << endl;
373 }
374 OPENMS_LOG_INFO << " total: " << setw(6) << out_map.size() << endl;
375
376 return EXECUTION_OK;
377 }
378
379};
380
Definition FLASHDeconvWizardBase.cpp:52