Skip to content

Commit 1b81e21

Browse files
modified submodule1 and 2 to add more clarification to nextflow.config file
1 parent 45f69e8 commit 1b81e21

File tree

2 files changed

+304
-66
lines changed

2 files changed

+304
-66
lines changed

AWS/01-RNA-Seq/RNA-seq.ipynb

+111-24
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,11 @@
2020
"cell_type": "code",
2121
"execution_count": null,
2222
"id": "1c2d28e0",
23-
"metadata": {},
23+
"metadata": {
24+
"vscode": {
25+
"languageId": "r"
26+
}
27+
},
2428
"outputs": [],
2529
"source": [
2630
"IRdisplay::display_html('<iframe src = \"../../quiz_files/rna-pre_module.html\" width=95% height=600></iframe>')"
@@ -201,7 +205,11 @@
201205
"cell_type": "code",
202206
"execution_count": null,
203207
"id": "95cda0d2",
204-
"metadata": {},
208+
"metadata": {
209+
"vscode": {
210+
"languageId": "r"
211+
}
212+
},
205213
"outputs": [],
206214
"source": [
207215
"system('chmod +x install_rna_seq_packages.sh' , intern=TRUE)\n",
@@ -245,7 +253,11 @@
245253
"cell_type": "code",
246254
"execution_count": null,
247255
"id": "6da16610-a83f-42b5-be52-f8116db3fb03",
248-
"metadata": {},
256+
"metadata": {
257+
"vscode": {
258+
"languageId": "r"
259+
}
260+
},
249261
"outputs": [],
250262
"source": [
251263
"#Install nexflow, make it exceutable, and update it\n",
@@ -260,9 +272,12 @@
260272
"metadata": {},
261273
"source": [
262274
"The size of the output data generated by Nextflow is quite large we can mitigate that by storing the temporary and output files to a bucket by setting the 'workDir' and 'params.outdir' to an existing bucket. Make sure you modify the file called rnaseq-aws.config.\n",
263-
" \n",
264-
"`workDir = 's3://your_bucket_name/rna-tmp'` \n",
265-
"`params.outdir = 's3://your_bucket_name/rna-outputs'`"
275+
"```\n",
276+
"aws.region = 'us-east-1' // YOUR AWS REGION\n",
277+
"queue = 'nextflow-batch-job-queue' // Name of your Job queue \n",
278+
"workDir = 's3://your_bucket_name/rna-tmp' \n",
279+
"params.outdir = 's3://your_bucket_name/rna-outputs'\n",
280+
"```"
266281
]
267282
},
268283
{
@@ -277,7 +292,11 @@
277292
"cell_type": "code",
278293
"execution_count": null,
279294
"id": "bb2e1f61-eea1-4464-9842-76fabae4c39a",
280-
"metadata": {},
295+
"metadata": {
296+
"vscode": {
297+
"languageId": "r"
298+
}
299+
},
281300
"outputs": [],
282301
"source": [
283302
"system('./nextflow run nf-core/rnaseq -c rnaseq-aws.config -profile test,aws', intern=TRUE)"
@@ -314,7 +333,11 @@
314333
"cell_type": "code",
315334
"execution_count": null,
316335
"id": "a5ea70df-98ba-4a69-aeec-2baed34a72fc",
317-
"metadata": {},
336+
"metadata": {
337+
"vscode": {
338+
"languageId": "r"
339+
}
340+
},
318341
"outputs": [],
319342
"source": [
320343
"library(DESeq2)\n",
@@ -337,7 +360,11 @@
337360
"cell_type": "code",
338361
"execution_count": null,
339362
"id": "e49c7007-61b3-4057-89de-3f3fb1085800",
340-
"metadata": {},
363+
"metadata": {
364+
"vscode": {
365+
"languageId": "r"
366+
}
367+
},
341368
"outputs": [],
342369
"source": [
343370
"# download data files from storage bucket\n",
@@ -372,7 +399,11 @@
372399
"cell_type": "code",
373400
"execution_count": null,
374401
"id": "5fce0d5b-740b-4f9c-8479-fa60a0566532",
375-
"metadata": {},
402+
"metadata": {
403+
"vscode": {
404+
"languageId": "r"
405+
}
406+
},
376407
"outputs": [],
377408
"source": [
378409
"DESeq.ds <- DESeqDataSetFromMatrix(countData = round(readcounts), colData = sample_info, design = ~condition)\n"
@@ -392,7 +423,11 @@
392423
"cell_type": "code",
393424
"execution_count": null,
394425
"id": "cb7c8095-8d5c-4165-a0bf-9acfec7d97fe",
395-
"metadata": {},
426+
"metadata": {
427+
"vscode": {
428+
"languageId": "r"
429+
}
430+
},
396431
"outputs": [],
397432
"source": [
398433
"colData(DESeq.ds) %>% head\n",
@@ -417,7 +452,11 @@
417452
"cell_type": "code",
418453
"execution_count": null,
419454
"id": "4fc2ab39-8608-435b-a8f9-fb6f379be65a",
420-
"metadata": {},
455+
"metadata": {
456+
"vscode": {
457+
"languageId": "r"
458+
}
459+
},
421460
"outputs": [],
422461
"source": [
423462
"DESeq.ds <- DESeq.ds[ rowSums(counts(DESeq.ds)) > 0, ]\n",
@@ -441,7 +480,11 @@
441480
"cell_type": "code",
442481
"execution_count": null,
443482
"id": "40aa5366-4c12-4443-99a9-39ac707639d3",
444-
"metadata": {},
483+
"metadata": {
484+
"vscode": {
485+
"languageId": "r"
486+
}
487+
},
445488
"outputs": [],
446489
"source": [
447490
"# Get the size factor using estimateSizeFactors from DESeq.\n",
@@ -466,7 +509,11 @@
466509
"cell_type": "code",
467510
"execution_count": null,
468511
"id": "38523c0a-b9af-49ca-b7e8-2dbb4604fe5c",
469-
"metadata": {},
512+
"metadata": {
513+
"vscode": {
514+
"languageId": "r"
515+
}
516+
},
470517
"outputs": [],
471518
"source": [
472519
"# transform size-factor normalized read counts to log2 scale using pseudocount of 1\n",
@@ -495,7 +542,11 @@
495542
"cell_type": "code",
496543
"execution_count": null,
497544
"id": "a283f6d7-3370-41e7-a85c-c54c695f721f",
498-
"metadata": {},
545+
"metadata": {
546+
"vscode": {
547+
"languageId": "r"
548+
}
549+
},
499550
"outputs": [],
500551
"source": [
501552
"# mean-sd plot\n",
@@ -523,7 +574,11 @@
523574
"cell_type": "code",
524575
"execution_count": null,
525576
"id": "8f444ca6-b525-4e2e-9f9e-4363663b45fc",
526-
"metadata": {},
577+
"metadata": {
578+
"vscode": {
579+
"languageId": "r"
580+
}
581+
},
527582
"outputs": [],
528583
"source": [
529584
"# Regularized log-transformed values\n",
@@ -551,7 +606,11 @@
551606
"cell_type": "code",
552607
"execution_count": null,
553608
"id": "8ca292e8-27a2-4336-b047-07751cef499d",
554-
"metadata": {},
609+
"metadata": {
610+
"vscode": {
611+
"languageId": "r"
612+
}
613+
},
555614
"outputs": [],
556615
"source": [
557616
"# cor() calculates the correlation between columns of a matrix\n",
@@ -579,7 +638,11 @@
579638
"cell_type": "code",
580639
"execution_count": null,
581640
"id": "b38f5fab-4b48-4e74-9e23-793e202312dc",
582-
"metadata": {},
641+
"metadata": {
642+
"vscode": {
643+
"languageId": "r"
644+
}
645+
},
583646
"outputs": [],
584647
"source": [
585648
"P <- plotPCA(DESeq.rlog)\n",
@@ -602,7 +665,11 @@
602665
"cell_type": "code",
603666
"execution_count": null,
604667
"id": "f36513db-22f6-417f-aaa8-39ba1837ca35",
605-
"metadata": {},
668+
"metadata": {
669+
"vscode": {
670+
"languageId": "r"
671+
}
672+
},
606673
"outputs": [],
607674
"source": [
608675
"# DESeq2 uses the levels of the condition to determine the order of the comparison\n",
@@ -625,7 +692,11 @@
625692
"cell_type": "code",
626693
"execution_count": null,
627694
"id": "17827bd4-0938-49e8-9f3a-b4e979fedb3a",
628-
"metadata": {},
695+
"metadata": {
696+
"vscode": {
697+
"languageId": "r"
698+
}
699+
},
629700
"outputs": [],
630701
"source": [
631702
"#Check the results of deseq analysis\n",
@@ -646,7 +717,11 @@
646717
"cell_type": "code",
647718
"execution_count": null,
648719
"id": "39a986b0-3ee2-436a-927b-37f539e7c627",
649-
"metadata": {},
720+
"metadata": {
721+
"vscode": {
722+
"languageId": "r"
723+
}
724+
},
650725
"outputs": [],
651726
"source": [
652727
"#Histogram\n",
@@ -668,7 +743,11 @@
668743
"cell_type": "code",
669744
"execution_count": null,
670745
"id": "079404d0-d179-4bdf-8a2a-63e135b68c45",
671-
"metadata": {},
746+
"metadata": {
747+
"vscode": {
748+
"languageId": "r"
749+
}
750+
},
672751
"outputs": [],
673752
"source": [
674753
"#MA plot\n",
@@ -692,7 +771,11 @@
692771
"cell_type": "code",
693772
"execution_count": null,
694773
"id": "5bdda266-08e2-47fd-bdd8-f06cd4dd7f0c",
695-
"metadata": {},
774+
"metadata": {
775+
"vscode": {
776+
"languageId": "r"
777+
}
778+
},
696779
"outputs": [],
697780
"source": [
698781
"#HEATMAP\n",
@@ -736,7 +819,11 @@
736819
"cell_type": "code",
737820
"execution_count": null,
738821
"id": "38f62bcf-3150-417e-bebb-787763db04aa",
739-
"metadata": {},
822+
"metadata": {
823+
"vscode": {
824+
"languageId": "r"
825+
}
826+
},
740827
"outputs": [],
741828
"source": [
742829
"write.table(DGE.results.sorted, file=\"rna-seq_dge-results.txt\", sep = \"\\t\")"

0 commit comments

Comments
 (0)