Skip to content

Commit 12ec7ef

Browse files
done with submodule 2!
1 parent a6fbcde commit 12ec7ef

File tree

4 files changed

+322
-73
lines changed

4 files changed

+322
-73
lines changed

AWS/01-RNA-Seq/RNA-seq.ipynb

+107-23
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,11 @@
2020
"cell_type": "code",
2121
"execution_count": null,
2222
"id": "f0e9ac26-e308-471a-83a4-113f767524b0",
23-
"metadata": {},
23+
"metadata": {
24+
"vscode": {
25+
"languageId": "r"
26+
}
27+
},
2428
"outputs": [],
2529
"source": [
2630
"IRdisplay::display_html('<iframe src = \"../../docs/quiz_files/rna-pre_module.html\" width=95% height=600></iframe>')"
@@ -218,7 +222,11 @@
218222
"cell_type": "code",
219223
"execution_count": null,
220224
"id": "6da16610-a83f-42b5-be52-f8116db3fb03",
221-
"metadata": {},
225+
"metadata": {
226+
"vscode": {
227+
"languageId": "r"
228+
}
229+
},
222230
"outputs": [],
223231
"source": [
224232
"#Install nexflow, make it exceutable, and update it\n",
@@ -232,7 +240,7 @@
232240
"id": "3852204f-a42a-40a2-8b3f-4c153a9b0416",
233241
"metadata": {},
234242
"source": [
235-
"**The size of the output data generated by Nextflow is quite large we can mitigate that by storing the temporary and output files to a bucket by setting the 'workDir' and 'params.outdir' to an existing bucket. Make sure you modify the file called rnaseq-aws.config**\n",
243+
"The size of the output data generated by Nextflow is quite large we can mitigate that by storing the temporary and output files to a bucket by setting the 'workDir' and 'params.outdir' to an existing bucket. Make sure you modify the file called rnaseq-aws.config.\n",
236244
" \n",
237245
"`workDir = 's3://your_bucket_name/rna-tmp'` \n",
238246
"`params.outdir = 's3://your_bucket_name/rna-outputs'`"
@@ -250,7 +258,11 @@
250258
"cell_type": "code",
251259
"execution_count": null,
252260
"id": "bb2e1f61-eea1-4464-9842-76fabae4c39a",
253-
"metadata": {},
261+
"metadata": {
262+
"vscode": {
263+
"languageId": "r"
264+
}
265+
},
254266
"outputs": [],
255267
"source": [
256268
"system('./nextflow run nf-core/rnaseq -c rnaseq-aws.config -profile test,aws', intern=TRUE)"
@@ -295,7 +307,11 @@
295307
"cell_type": "code",
296308
"execution_count": null,
297309
"id": "d888aacf-8f6d-427e-8646-f33a33203fd9",
298-
"metadata": {},
310+
"metadata": {
311+
"vscode": {
312+
"languageId": "r"
313+
}
314+
},
299315
"outputs": [],
300316
"source": [
301317
"system('chmod +x install_rna_seq_packages.sh' , intern=TRUE)\n",
@@ -307,14 +323,18 @@
307323
"id": "8ad88e67",
308324
"metadata": {},
309325
"source": [
310-
"**Important**: Choose \"R-RNA-Seq\" kernel for the rest of the notebook."
326+
"**Important: Choose \"R-RNA-Seq\" kernel for the rest of the notebook.**"
311327
]
312328
},
313329
{
314330
"cell_type": "code",
315331
"execution_count": null,
316332
"id": "a5ea70df-98ba-4a69-aeec-2baed34a72fc",
317-
"metadata": {},
333+
"metadata": {
334+
"vscode": {
335+
"languageId": "r"
336+
}
337+
},
318338
"outputs": [],
319339
"source": [
320340
"library(DESeq2)\n",
@@ -337,7 +357,11 @@
337357
"cell_type": "code",
338358
"execution_count": null,
339359
"id": "e49c7007-61b3-4057-89de-3f3fb1085800",
340-
"metadata": {},
360+
"metadata": {
361+
"vscode": {
362+
"languageId": "r"
363+
}
364+
},
341365
"outputs": [],
342366
"source": [
343367
"# download data files from storage bucket\n",
@@ -372,7 +396,11 @@
372396
"cell_type": "code",
373397
"execution_count": null,
374398
"id": "5fce0d5b-740b-4f9c-8479-fa60a0566532",
375-
"metadata": {},
399+
"metadata": {
400+
"vscode": {
401+
"languageId": "r"
402+
}
403+
},
376404
"outputs": [],
377405
"source": [
378406
"DESeq.ds <- DESeqDataSetFromMatrix(countData = round(readcounts), colData = sample_info, design = ~condition)\n"
@@ -392,7 +420,11 @@
392420
"cell_type": "code",
393421
"execution_count": null,
394422
"id": "cb7c8095-8d5c-4165-a0bf-9acfec7d97fe",
395-
"metadata": {},
423+
"metadata": {
424+
"vscode": {
425+
"languageId": "r"
426+
}
427+
},
396428
"outputs": [],
397429
"source": [
398430
"colData(DESeq.ds) %>% head\n",
@@ -417,7 +449,11 @@
417449
"cell_type": "code",
418450
"execution_count": null,
419451
"id": "4fc2ab39-8608-435b-a8f9-fb6f379be65a",
420-
"metadata": {},
452+
"metadata": {
453+
"vscode": {
454+
"languageId": "r"
455+
}
456+
},
421457
"outputs": [],
422458
"source": [
423459
"DESeq.ds <- DESeq.ds[ rowSums(counts(DESeq.ds)) > 0, ]\n",
@@ -441,7 +477,11 @@
441477
"cell_type": "code",
442478
"execution_count": null,
443479
"id": "40aa5366-4c12-4443-99a9-39ac707639d3",
444-
"metadata": {},
480+
"metadata": {
481+
"vscode": {
482+
"languageId": "r"
483+
}
484+
},
445485
"outputs": [],
446486
"source": [
447487
"# Get the size factor using estimateSizeFactors from DESeq.\n",
@@ -466,7 +506,11 @@
466506
"cell_type": "code",
467507
"execution_count": null,
468508
"id": "38523c0a-b9af-49ca-b7e8-2dbb4604fe5c",
469-
"metadata": {},
509+
"metadata": {
510+
"vscode": {
511+
"languageId": "r"
512+
}
513+
},
470514
"outputs": [],
471515
"source": [
472516
"# transform size-factor normalized read counts to log2 scale using pseudocount of 1\n",
@@ -495,7 +539,11 @@
495539
"cell_type": "code",
496540
"execution_count": null,
497541
"id": "a283f6d7-3370-41e7-a85c-c54c695f721f",
498-
"metadata": {},
542+
"metadata": {
543+
"vscode": {
544+
"languageId": "r"
545+
}
546+
},
499547
"outputs": [],
500548
"source": [
501549
"# mean-sd plot\n",
@@ -523,7 +571,11 @@
523571
"cell_type": "code",
524572
"execution_count": null,
525573
"id": "8f444ca6-b525-4e2e-9f9e-4363663b45fc",
526-
"metadata": {},
574+
"metadata": {
575+
"vscode": {
576+
"languageId": "r"
577+
}
578+
},
527579
"outputs": [],
528580
"source": [
529581
"# Regularized log-transformed values\n",
@@ -551,7 +603,11 @@
551603
"cell_type": "code",
552604
"execution_count": null,
553605
"id": "8ca292e8-27a2-4336-b047-07751cef499d",
554-
"metadata": {},
606+
"metadata": {
607+
"vscode": {
608+
"languageId": "r"
609+
}
610+
},
555611
"outputs": [],
556612
"source": [
557613
"# cor() calculates the correlation between columns of a matrix\n",
@@ -579,7 +635,11 @@
579635
"cell_type": "code",
580636
"execution_count": null,
581637
"id": "b38f5fab-4b48-4e74-9e23-793e202312dc",
582-
"metadata": {},
638+
"metadata": {
639+
"vscode": {
640+
"languageId": "r"
641+
}
642+
},
583643
"outputs": [],
584644
"source": [
585645
"P <- plotPCA(DESeq.rlog)\n",
@@ -602,7 +662,11 @@
602662
"cell_type": "code",
603663
"execution_count": null,
604664
"id": "f36513db-22f6-417f-aaa8-39ba1837ca35",
605-
"metadata": {},
665+
"metadata": {
666+
"vscode": {
667+
"languageId": "r"
668+
}
669+
},
606670
"outputs": [],
607671
"source": [
608672
"# DESeq2 uses the levels of the condition to determine the order of the comparison\n",
@@ -625,7 +689,11 @@
625689
"cell_type": "code",
626690
"execution_count": null,
627691
"id": "17827bd4-0938-49e8-9f3a-b4e979fedb3a",
628-
"metadata": {},
692+
"metadata": {
693+
"vscode": {
694+
"languageId": "r"
695+
}
696+
},
629697
"outputs": [],
630698
"source": [
631699
"#Check the results of deseq analysis\n",
@@ -646,7 +714,11 @@
646714
"cell_type": "code",
647715
"execution_count": null,
648716
"id": "39a986b0-3ee2-436a-927b-37f539e7c627",
649-
"metadata": {},
717+
"metadata": {
718+
"vscode": {
719+
"languageId": "r"
720+
}
721+
},
650722
"outputs": [],
651723
"source": [
652724
"#Histogram\n",
@@ -668,7 +740,11 @@
668740
"cell_type": "code",
669741
"execution_count": null,
670742
"id": "079404d0-d179-4bdf-8a2a-63e135b68c45",
671-
"metadata": {},
743+
"metadata": {
744+
"vscode": {
745+
"languageId": "r"
746+
}
747+
},
672748
"outputs": [],
673749
"source": [
674750
"#MA plot\n",
@@ -692,7 +768,11 @@
692768
"cell_type": "code",
693769
"execution_count": null,
694770
"id": "5bdda266-08e2-47fd-bdd8-f06cd4dd7f0c",
695-
"metadata": {},
771+
"metadata": {
772+
"vscode": {
773+
"languageId": "r"
774+
}
775+
},
696776
"outputs": [],
697777
"source": [
698778
"#HEATMAP\n",
@@ -736,7 +816,11 @@
736816
"cell_type": "code",
737817
"execution_count": null,
738818
"id": "38f62bcf-3150-417e-bebb-787763db04aa",
739-
"metadata": {},
819+
"metadata": {
820+
"vscode": {
821+
"languageId": "r"
822+
}
823+
},
740824
"outputs": [],
741825
"source": [
742826
"write.table(DGE.results.sorted, file=\"rna-seq_dge-results.txt\", sep = \"\\t\")"

AWS/01-RNA-Seq/rnaseq-aws.config

+4-4
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,13 @@ plugins {
55
profiles {
66
aws {
77
process {
8-
executor = 'awsbatch'
9-
queue = 'nextflow-batch-job-queue'
8+
executor = 'awsbatch' // name of your Compute environments
9+
queue = 'nextflow-batch-job-queue' // name of your Job queue
1010
container = 'nf-core/rnaseq'
1111

1212
}
13-
workDir = 's3://your_bucket_name/rna-tmp/'
14-
params.outdir = 's3://your_bucket_name/rna-outputs/'
13+
workDir = 's3://your_bucket_name/rna-tmp/' // path of your working directory
14+
params.outdir = 's3://your_bucket_name/rna-outputs/' // path of your output directory
1515

1616
fusion.enabled = true
1717
wave.enabled = true

0 commit comments

Comments
 (0)