flowchart TB
v0(Channel.fromList)
v2(filter)
v10(filter)
v18(align_query_reference)
v25(cross)
v35(cross)
v41(filter)
v52(filter)
v64(cross)
v74(cross)
v84(concat)
v91(cross)
v101(cross)
v107(flatMap)
v111(filter)
v119(concatenate_h5mu)
v126(cross)
v136(cross)
v143(filter)
v151(highly_variable_features_scanpy)
v158(cross)
v168(cross)
v174(filter)
v182(pca)
v189(cross)
v199(cross)
v205(filter)
v213(delete_aligned_lognormalized_counts_layer)
v220(cross)
v230(cross)
v236(filter)
v244(filter)
v259(cross)
v269(cross)
v275(filter)
v429(concat)
v284(filter)
v299(cross)
v309(cross)
v318(branch)
v345(concat)
v330(cross)
v340(cross)
v349(branch)
v376(concat)
v361(cross)
v371(cross)
v380(branch)
v407(concat)
v392(cross)
v402(cross)
v414(cross)
v424(cross)
v436(cross)
v446(cross)
v453(filter)
v464(filter)
v476(cross)
v486(cross)
v496(concat)
v503(cross)
v513(cross)
v522(filter)
v530(knn)
v537(cross)
v547(cross)
v555(mix)
v558(filter)
v588(concat)
v566(merge)
v573(cross)
v583(cross)
v595(cross)
v602(cross)
v614(cross)
v621(cross)
v625(Output)
subgraph group_split_modalities [split_modalities]
v57(split_modalities_component)
end
subgraph group_harmony_leiden_workflow [harmony_leiden_workflow]
v252(harmonypy)
v292(find_neighbors)
v323(leiden)
v354(move_obsm_to_obs)
v385(umap)
end
subgraph group_split_h5mu [split_h5mu]
v469(split_h5mu_component)
end
v318-->v345
v349-->v376
v380-->v407
v0-->v2
v2-->v10
v10-->v18
v18-->v25
v10-->v25
v10-->v35
v52-->v57
v57-->v64
v52-->v64
v52-->v74
v84-->v91
v41-->v91
v41-->v101
v111-->v119
v119-->v126
v111-->v126
v111-->v136
v143-->v151
v151-->v158
v143-->v158
v143-->v168
v174-->v182
v182-->v189
v174-->v189
v174-->v199
v205-->v213
v213-->v220
v205-->v220
v205-->v230
v236-->v244
v244-->v252
v252-->v259
v244-->v259
v244-->v269
v275-->v284
v284-->v292
v292-->v299
v284-->v299
v284-->v309
v318-->v323
v323-->v330
v318-->v330
v318-->v340
v340-->v345
v349-->v354
v354-->v361
v349-->v361
v349-->v371
v371-->v376
v380-->v385
v385-->v392
v380-->v392
v380-->v402
v402-->v407
v407-->v414
v275-->v414
v275-->v424
v424-->v429
v429-->v436
v236-->v436
v236-->v446
v464-->v469
v469-->v476
v464-->v476
v464-->v486
v496-->v503
v453-->v503
v453-->v513
v522-->v530
v530-->v537
v522-->v537
v522-->v547
v558-->v566
v566-->v573
v558-->v573
v558-->v583
v583-->v588
v588-->v595
v2-->v595
v595-->v602
v2-->v602
v2-->v614
v614-->v621
v2-->v621
v621-->v625
v35-->v41
v18-->v35
v101-->v107
v41-->v52
v74-->v84
v57-->v74
v84-->v101
v107-->v111
v136-->v143
v119-->v136
v168-->v174
v151-->v168
v199-->v205
v182-->v199
v230-->v236
v213-->v230
v446-->v453
v269-->v275
v252-->v269
v292-->v309
v309-->v318
v323-->v340
v345-->v349
v354-->v371
v376-->v380
v385-->v402
v407-->v424
v429-->v446
v513-->v522
v453-->v464
v486-->v496
v469-->v486
v496-->v513
v547-->v555
v530-->v547
v107-->v555
v555-->v558
v566-->v583
v588-->v614
style group_split_modalities fill:#F0F0F0,stroke:#969696;
style group_harmony_leiden_workflow fill:#F0F0F0,stroke:#969696;
style group_split_h5mu fill:#F0F0F0,stroke:#969696;
style v0 fill:#e3dcea,stroke:#7a4baa;
style v2 fill:#e3dcea,stroke:#7a4baa;
style v10 fill:#e3dcea,stroke:#7a4baa;
style v18 fill:#e3dcea,stroke:#7a4baa;
style v25 fill:#e3dcea,stroke:#7a4baa;
style v35 fill:#e3dcea,stroke:#7a4baa;
style v41 fill:#e3dcea,stroke:#7a4baa;
style v52 fill:#e3dcea,stroke:#7a4baa;
style v57 fill:#e3dcea,stroke:#7a4baa;
style v64 fill:#e3dcea,stroke:#7a4baa;
style v74 fill:#e3dcea,stroke:#7a4baa;
style v84 fill:#e3dcea,stroke:#7a4baa;
style v91 fill:#e3dcea,stroke:#7a4baa;
style v101 fill:#e3dcea,stroke:#7a4baa;
style v107 fill:#e3dcea,stroke:#7a4baa;
style v111 fill:#e3dcea,stroke:#7a4baa;
style v119 fill:#e3dcea,stroke:#7a4baa;
style v126 fill:#e3dcea,stroke:#7a4baa;
style v136 fill:#e3dcea,stroke:#7a4baa;
style v143 fill:#e3dcea,stroke:#7a4baa;
style v151 fill:#e3dcea,stroke:#7a4baa;
style v158 fill:#e3dcea,stroke:#7a4baa;
style v168 fill:#e3dcea,stroke:#7a4baa;
style v174 fill:#e3dcea,stroke:#7a4baa;
style v182 fill:#e3dcea,stroke:#7a4baa;
style v189 fill:#e3dcea,stroke:#7a4baa;
style v199 fill:#e3dcea,stroke:#7a4baa;
style v205 fill:#e3dcea,stroke:#7a4baa;
style v213 fill:#e3dcea,stroke:#7a4baa;
style v220 fill:#e3dcea,stroke:#7a4baa;
style v230 fill:#e3dcea,stroke:#7a4baa;
style v236 fill:#e3dcea,stroke:#7a4baa;
style v244 fill:#e3dcea,stroke:#7a4baa;
style v252 fill:#e3dcea,stroke:#7a4baa;
style v259 fill:#e3dcea,stroke:#7a4baa;
style v269 fill:#e3dcea,stroke:#7a4baa;
style v275 fill:#e3dcea,stroke:#7a4baa;
style v429 fill:#e3dcea,stroke:#7a4baa;
style v284 fill:#e3dcea,stroke:#7a4baa;
style v292 fill:#e3dcea,stroke:#7a4baa;
style v299 fill:#e3dcea,stroke:#7a4baa;
style v309 fill:#e3dcea,stroke:#7a4baa;
style v318 fill:#e3dcea,stroke:#7a4baa;
style v345 fill:#e3dcea,stroke:#7a4baa;
style v323 fill:#e3dcea,stroke:#7a4baa;
style v330 fill:#e3dcea,stroke:#7a4baa;
style v340 fill:#e3dcea,stroke:#7a4baa;
style v349 fill:#e3dcea,stroke:#7a4baa;
style v376 fill:#e3dcea,stroke:#7a4baa;
style v354 fill:#e3dcea,stroke:#7a4baa;
style v361 fill:#e3dcea,stroke:#7a4baa;
style v371 fill:#e3dcea,stroke:#7a4baa;
style v380 fill:#e3dcea,stroke:#7a4baa;
style v407 fill:#e3dcea,stroke:#7a4baa;
style v385 fill:#e3dcea,stroke:#7a4baa;
style v392 fill:#e3dcea,stroke:#7a4baa;
style v402 fill:#e3dcea,stroke:#7a4baa;
style v414 fill:#e3dcea,stroke:#7a4baa;
style v424 fill:#e3dcea,stroke:#7a4baa;
style v436 fill:#e3dcea,stroke:#7a4baa;
style v446 fill:#e3dcea,stroke:#7a4baa;
style v453 fill:#e3dcea,stroke:#7a4baa;
style v464 fill:#e3dcea,stroke:#7a4baa;
style v469 fill:#e3dcea,stroke:#7a4baa;
style v476 fill:#e3dcea,stroke:#7a4baa;
style v486 fill:#e3dcea,stroke:#7a4baa;
style v496 fill:#e3dcea,stroke:#7a4baa;
style v503 fill:#e3dcea,stroke:#7a4baa;
style v513 fill:#e3dcea,stroke:#7a4baa;
style v522 fill:#e3dcea,stroke:#7a4baa;
style v530 fill:#e3dcea,stroke:#7a4baa;
style v537 fill:#e3dcea,stroke:#7a4baa;
style v547 fill:#e3dcea,stroke:#7a4baa;
style v555 fill:#e3dcea,stroke:#7a4baa;
style v558 fill:#e3dcea,stroke:#7a4baa;
style v588 fill:#e3dcea,stroke:#7a4baa;
style v566 fill:#e3dcea,stroke:#7a4baa;
style v573 fill:#e3dcea,stroke:#7a4baa;
style v583 fill:#e3dcea,stroke:#7a4baa;
style v595 fill:#e3dcea,stroke:#7a4baa;
style v602 fill:#e3dcea,stroke:#7a4baa;
style v614 fill:#e3dcea,stroke:#7a4baa;
style v621 fill:#e3dcea,stroke:#7a4baa;
style v625 fill:#e3dcea,stroke:#7a4baa;
Harmony integration followed by KNN label transfer
Cell type annotation workflow by performing harmony integration of reference and query dataset followed by KNN label transfer.
Info
ID: harmony_knn
Namespace: workflows/annotation
Links
Example commands
You can run the pipeline using nextflow run.
View help
You can use --help as a parameter to get an overview of the possible parameters.
nextflow run openpipelines-bio/openpipeline \
-r 2.1.1 -latest \
-main-script target/nextflow/workflows/annotation/harmony_knn/main.nf \
--helpRun command
Example of params.yaml
# Query Input
id: # please fill in - example: "foo"
input: # please fill in - example: "input.h5mu"
modality: "rna"
# input_layer: "foo"
input_obs_batch_label: # please fill in - example: "sample"
# input_var_gene_names: "foo"
input_reference_gene_overlap: 100
overwrite_existing_key: false
# Reference input
reference: # please fill in - example: "reference.h5mu"
# reference_layer: "foo"
reference_obs_target: # please fill in - example: "cell_type"
# reference_var_gene_names: "foo"
reference_obs_batch_label: # please fill in - example: "sample"
# HVG subset arguments
n_hvg: 2000
# PCA options
# pca_num_components: 25
# Harmony integration options
harmony_theta: [2.0]
# Leiden clustering options
leiden_resolution: [1.0]
# Neighbor classifier arguments
knn_weights: "uniform"
knn_n_neighbors: 15
# Outputs
# output: "$id.$key.output.h5mu"
# output_obs_predictions: ["foo"]
# output_obs_probability: ["foo"]
output_obsm_integrated: "X_integrated_harmony"
# output_compression: "gzip"
# Nextflow input-output arguments
publish_dir: # please fill in - example: "output/"
# param_list: "my_params.yaml"
# Argumentsnextflow run openpipelines-bio/openpipeline \
-r 2.1.1 -latest \
-profile docker \
-main-script target/nextflow/workflows/annotation/harmony_knn/main.nf \
-params-file params.yaml
Note
Replace -profile docker with -profile podman or -profile singularity depending on the desired backend.
Argument groups
Query Input
| Name | Description | Attributes |
|---|---|---|
--id |
ID of the sample. | string, required, example: "foo" |
--input |
Input dataset consisting of the (unlabeled) query observations. The dataset is expected to be pre-processed in the same way as –reference. | file, required, example: "input.h5mu" |
--modality |
Which modality to process. Should match the modality of the –reference dataset. | string, default: "rna" |
--input_layer |
The layer of the input dataset to process if .X is not to be used. Should contain log normalized counts. | string |
--input_obs_batch_label |
The .obs field in the input (query) dataset containing the batch labels. | string, required, example: "sample" |
--input_var_gene_names |
The .var field in the input (query) dataset containing gene names; if not provided, the .var index will be used. | string |
--input_reference_gene_overlap |
The minimum number of genes present in both the reference and query datasets. | integer, default: 100 |
--overwrite_existing_key |
If provided, will overwrite existing fields in the input dataset when data are copied during the reference alignment process. | boolean_true |
Reference input
| Name | Description | Attributes |
|---|---|---|
--reference |
Reference dataset consisting of the labeled observations to train the KNN classifier on. The dataset is expected to be pre-processed in the same way as the –input query dataset. | file, required, example: "reference.h5mu" |
--reference_layer |
The layer of the reference dataset to process if .X is not to be used. Should contain log normalized counts. | string |
--reference_obs_target |
The .obs key of the target cell type labels to transfer. |
string, required, example: "cell_type" |
--reference_var_gene_names |
The .var field in the reference dataset containing gene names; if not provided, the .var index will be used. | string |
--reference_obs_batch_label |
The .obs field in the reference dataset containing the batch labels. | string, required, example: "sample" |
HVG subset arguments
| Name | Description | Attributes |
|---|---|---|
--n_hvg |
Number of highly variable genes to subset for. | integer, default: 2000 |
PCA options
| Name | Description | Attributes |
|---|---|---|
--pca_num_components |
Number of principal components to compute. Defaults to 50, or 1 - minimum dimension size of selected representation. | integer, example: 25 |
Harmony integration options
| Name | Description | Attributes |
|---|---|---|
--harmony_theta |
Diversity clustering penalty parameter. Can be set as a single value for all batch observations or as multiple values, one for each observation in the batches defined by –input_obs_batch_label. theta=0 does not encourage any diversity. Larger values of theta result in more diverse clusters.” | List of double, default: 2, multiple_sep: ";" |
Leiden clustering options
| Name | Description | Attributes |
|---|---|---|
--leiden_resolution |
Control the coarseness of the clustering. Higher values lead to more clusters. | List of double, default: 1, multiple_sep: ";" |
Neighbor classifier arguments
| Name | Description | Attributes |
|---|---|---|
--knn_weights |
Weight function used in prediction. Possible values are: uniform (all points in each neighborhood are weighted equally) or distance (weight points by the inverse of their distance) |
string, default: "uniform" |
--knn_n_neighbors |
The number of neighbors to use in k-neighbor graph structure used for fast approximate nearest neighbor search with PyNNDescent. Larger values will result in more accurate search results at the cost of computation time. | integer, default: 15 |
Outputs
| Name | Description | Attributes |
|---|---|---|
--output |
The query data in .h5mu format with predicted labels predicted from the classifier trained on the reference. | file, required, example: "output.h5mu" |
--output_obs_predictions |
In which .obs slots to store the predicted cell labels. If provided, must have the same length as --reference_obs_targets. If empty, will default to the reference_obs_targets combined with the "_pred" suffix. |
List of string, multiple_sep: ";" |
--output_obs_probability |
In which .obs slots to store the probability of the predictions. If provided, must have the same length as --reference_obs_targets. If empty, will default to the reference_obs_targets combined with the "_probability" suffix. |
List of string, multiple_sep: ";" |
--output_obsm_integrated |
In which .obsm slot to store the integrated embedding. | string, default: "X_integrated_harmony" |
--output_compression |
The compression format to be used on the output h5mu object. | string, example: "gzip" |