8.7 MS libraries

The loadMSLibraries() function is used to load MS spectral libraries, and was already briefly introduced for compound annotation. Currently, loading of MSP files and MoNA JSON files is supported, while loading of formula annotations for MS peaks is currently only supported for the latter. The underlying algorithms implement several optimizations to efficiently load large number of records. Furthermore, loadMSLibraries() automatically verifies record data such as formulas, adducts and masses, and automatically calculates missing or invalid data where possible.

mslibraryMSP <- loadMSLibrary("MoNA-export-CASMI_2016.msp", "msp")
mslibraryJSON <- loadMSLibrary("MoNA-export-CASMI_2016.json", "json")

Several advanced parameters are available that influence the loading of MS library data, see the reference manual (?loadMSLibrary) for details.

Once loaded, the usual methods are available to inspect its data:

show(mslibraryMSP)
#> A MSLibrary object
#> Hierarchy:
#> workflowStep
#>     |-- MSLibrary
#> ---
#> Object size (indication): 101.6 kB
#> Algorithm: msp
#> Total records: 26
#> Total peaks: 318
#> Total annotated peaks: 0 (0.00%)
mslibraryMSP[[1]] # MS/MS spectrum for first candidate
#>           mz  intensity
#>        <num>      <num>
#>  1: 135.0441   1.001001
#>  2: 161.0594   0.500501
#>  3: 163.0379   0.600601
#>  4: 173.0590   0.200200
#>  5: 176.0699   0.200200
#> ---                    
#> 44: 353.1191   1.201201
#> 45: 354.1323 100.000000
#> 46: 355.1351  20.820821
#> 47: 356.1374   2.702703
#> 48: 357.1401   0.300300
mslibraryJSON[["SM801601"]] # a record with annotations
#>          mz  intensity annotation
#>       <num>      <num>     <char>
#> 1:  65.0388   0.100228       C5H5
#> 2:  91.0541   0.922448       C7H7
#> 3:  93.0573   5.489900      C6H7N
#> 4: 106.0651   0.101855      C7H8N
#> 5: 108.0807 100.000000     C7H10N
#> 6: 109.0648   2.004170      C7H9O
#> 7: 132.0807   0.926004     C9H10N
#> 8: 150.0913  76.554515    C9H12NO
 # overview of all metadata (select few columns for readability)
records(mslibraryJSON)[, .(DB_ID, Name, InChIKey, formula)]
#>         DB_ID                              Name                    InChIKey         formula
#>        <char>                            <char>                      <char>          <char>
#>   1: SM800003                    1,2,3-Triazole QWENRTYMTSOGBR-UHFFFAOYSA-N          C2H3N3
#>   2: SM800201                   1-Naphthylamine RUFPHBVGCFYCNW-UHFFFAOYSA-N          C10H9N
#>   3: SM800553             2,3-Dihydroxybiphenyl YKOQAAJBYBTSBS-UHFFFAOYSA-N        C12H10O2
#>   4: SM800653                  2,4-Dibromphenol FAXWFCTVSHEODL-UHFFFAOYSA-N        C6H4Br2O
#>   5: SM800802                 2-Aminoanthracene YCSBALJAGZKWFF-UHFFFAOYSA-N         C14H11N
#>  ---                                                                                       
#> 618: SM884401                  Anthranilic acid RWZYAGGXGHYGMB-UHFFFAOYSA-N         C7H7NO2
#> 619: SM884552                  Fipronil sulfide FQXWEKADCSXYOC-UHFFFAOYSA-N   C12H4Cl2F6N4S
#> 620: SM884652                  Fipronil sulfone LGHZJDKSVUTELU-UHFFFAOYSA-N C12H4Cl2F6N4O2S
#> 621: SM884701 N-Cyclohexyl-2-benzothiazol-amine UPWPIFMHSFSVLE-UHFFFAOYSA-N       C13H16N2S
#> 622: SM884952               Fipronil desulfinyl JWKXVHLIRTVXLD-UHFFFAOYSA-N    C12H4Cl2F6N4
# convert all data to a data.table (may be huge!)
as.data.table(mslibraryMSP)[, .(DB_ID, SMILES, formula, mz, intensity)]
#> Key: <DB_ID>
#>         DB_ID                                                         SMILES   formula       mz intensity
#>        <char>                                                         <char>    <char>    <num>     <num>
#>   1: SMI00001 CN1CC2=C(C=CC3=C2OCO3)[C@@H]4[C@H]1C5=CC6=C(C=C5C[C@@H]4O)OCO6 C20H19NO5 135.0441  1.001001
#>   2: SMI00001 CN1CC2=C(C=CC3=C2OCO3)[C@@H]4[C@H]1C5=CC6=C(C=C5C[C@@H]4O)OCO6 C20H19NO5 161.0594  0.500501
#>   3: SMI00001 CN1CC2=C(C=CC3=C2OCO3)[C@@H]4[C@H]1C5=CC6=C(C=C5C[C@@H]4O)OCO6 C20H19NO5 163.0379  0.600601
#>   4: SMI00001 CN1CC2=C(C=CC3=C2OCO3)[C@@H]4[C@H]1C5=CC6=C(C=C5C[C@@H]4O)OCO6 C20H19NO5 173.0590  0.200200
#>   5: SMI00001 CN1CC2=C(C=CC3=C2OCO3)[C@@H]4[C@H]1C5=CC6=C(C=C5C[C@@H]4O)OCO6 C20H19NO5 176.0699  0.200200
#>  ---                                                                                                     
#> 314: SMI00172                                  C1=CC=C(C=C1)NN=CC2=CC=CC=C2N  C13H13N3 120.0678 22.170483
#> 315: SMI00172                                  C1=CC=C(C=C1)NN=CC2=CC=CC=C2N  C13H13N3 121.0756  6.520678
#> 316: SMI00172                                  C1=CC=C(C=C1)NN=CC2=CC=CC=C2N  C13H13N3 167.0729 28.015663
#> 317: SMI00172                                  C1=CC=C(C=C1)NN=CC2=CC=CC=C2N  C13H13N3 168.0810 13.500651
#> 318: SMI00172                                  C1=CC=C(C=C1)NN=CC2=CC=CC=C2N  C13H13N3 195.0917  8.223496

Furthermore, like many other objects in patRoon, the MS library objects can be subset and filtered:

mslibrarySub <- mslibrary[1:100] # only keep first 100 records

# only keep records a neutral mass of 100-200
mslibraryF <- filter(mslibrary, massRange = c(100, 200))
# remove records with neutral mass below 100
mslibraryF <- filter(mslibrary, massRange = c(0, 100), negate = TRUE)
# only keep mass peaks with m/z 100-500
mslibraryF <- filter(mslibrary, mzRangeSpec = c(100, 500))
# remove low intensity peaks (<1%) and only keep top 10
mslibraryF <- filter(mslibrary, relMinIntensity = 0.01, topMost = 10)
# only keep mass peak with annotations
mslibraryF <- filter(mslibraryJSON, onlyAnnotated = TRUE)

In addition, the properties filter may be useful to tailor the library data. The library properties can be obtained as following:

names(records(mslibrary)) # get all property names
#>  [1] "Name"             "Synon"            "DB_ID"            "InChIKey"        
#>  [5] "InChI"            "Precursor_type"   "Spectrum_type"    "PrecursorMZ"     
#>  [9] "Instrument_type"  "Instrument"       "Ion_mode"         "Collision_energy"
#> [13] "formula"          "MW"               "neutralMass"      "Comments"        
#> [17] "SMILES"           "SPLASH"           "CAS"              "PubChemCID"      
#> [21] "ChemSpiderID"     "Ionization"       "Resolution"
unique(records(mslibrary)[["Instrument_type"]]) # Get the available instrument types
#> [1] "LC-ESI-QTOF"  "LC-APCI-ITFT" "APCI-ITFT"

Then to filter the MS library:

# only keep APCI instrument types
mslibraryF <- filter(mslibrary, properties = list(Instrument_type = c("LC-APCI-ITFT", "APCI-ITFT")))
# remove Q-TOF by negation
mslibraryF <- filter(mslibrary, properties = list(Instrument_type = "LC-ESI-QTOF"), negate = TRUE)

More advanced filtering can be performed with the delete() generic function, see the reference manual for details (?MSLibrary).

Finally, functionality exists to convert, export and merge MS libraries:

# Convert the MS library to a suspect list.
# By setting collapse to TRUE, all records with the same first block InChIKey
# are collapsed and mass peaks are averaged.
suspL <- convertToSuspects(mslibrary, adduct = "[M+H]+", collapse = TRUE)
# Amend custom suspect list with library data (fragments_mz column)
suspL <- convertToSuspects(mslibrary, adduct = "[M+H]+", suspects = patRoonData::suspectsPos)

export(mslibrary, out = "myMSLib.msp") # export to a new MSP library

mslibraryM <- merge(mslibraryMSP, mslibraryJSON) # merge two libraries