|
20 | 20 | "cell_type": "code",
|
21 | 21 | "execution_count": null,
|
22 | 22 | "id": "f0e9ac26-e308-471a-83a4-113f767524b0",
|
23 |
| - "metadata": {}, |
| 23 | + "metadata": { |
| 24 | + "vscode": { |
| 25 | + "languageId": "r" |
| 26 | + } |
| 27 | + }, |
24 | 28 | "outputs": [],
|
25 | 29 | "source": [
|
26 | 30 | "IRdisplay::display_html('<iframe src = \"../../docs/quiz_files/rna-pre_module.html\" width=95% height=600></iframe>')"
|
|
218 | 222 | "cell_type": "code",
|
219 | 223 | "execution_count": null,
|
220 | 224 | "id": "6da16610-a83f-42b5-be52-f8116db3fb03",
|
221 |
| - "metadata": {}, |
| 225 | + "metadata": { |
| 226 | + "vscode": { |
| 227 | + "languageId": "r" |
| 228 | + } |
| 229 | + }, |
222 | 230 | "outputs": [],
|
223 | 231 | "source": [
|
224 | 232 | "#Install nexflow, make it exceutable, and update it\n",
|
|
232 | 240 | "id": "3852204f-a42a-40a2-8b3f-4c153a9b0416",
|
233 | 241 | "metadata": {},
|
234 | 242 | "source": [
|
235 |
| - "**The size of the output data generated by Nextflow is quite large we can mitigate that by storing the temporary and output files to a bucket by setting the 'workDir' and 'params.outdir' to an existing bucket. Make sure you modify the file called rnaseq-aws.config**\n", |
| 243 | + "The size of the output data generated by Nextflow is quite large we can mitigate that by storing the temporary and output files to a bucket by setting the 'workDir' and 'params.outdir' to an existing bucket. Make sure you modify the file called rnaseq-aws.config.\n", |
236 | 244 | " \n",
|
237 | 245 | "`workDir = 's3://your_bucket_name/rna-tmp'` \n",
|
238 | 246 | "`params.outdir = 's3://your_bucket_name/rna-outputs'`"
|
|
250 | 258 | "cell_type": "code",
|
251 | 259 | "execution_count": null,
|
252 | 260 | "id": "bb2e1f61-eea1-4464-9842-76fabae4c39a",
|
253 |
| - "metadata": {}, |
| 261 | + "metadata": { |
| 262 | + "vscode": { |
| 263 | + "languageId": "r" |
| 264 | + } |
| 265 | + }, |
254 | 266 | "outputs": [],
|
255 | 267 | "source": [
|
256 | 268 | "system('./nextflow run nf-core/rnaseq -c rnaseq-aws.config -profile test,aws', intern=TRUE)"
|
|
295 | 307 | "cell_type": "code",
|
296 | 308 | "execution_count": null,
|
297 | 309 | "id": "d888aacf-8f6d-427e-8646-f33a33203fd9",
|
298 |
| - "metadata": {}, |
| 310 | + "metadata": { |
| 311 | + "vscode": { |
| 312 | + "languageId": "r" |
| 313 | + } |
| 314 | + }, |
299 | 315 | "outputs": [],
|
300 | 316 | "source": [
|
301 | 317 | "system('chmod +x install_rna_seq_packages.sh' , intern=TRUE)\n",
|
|
307 | 323 | "id": "8ad88e67",
|
308 | 324 | "metadata": {},
|
309 | 325 | "source": [
|
310 |
| - "**Important**: Choose \"R-RNA-Seq\" kernel for the rest of the notebook." |
| 326 | + "**Important: Choose \"R-RNA-Seq\" kernel for the rest of the notebook.**" |
311 | 327 | ]
|
312 | 328 | },
|
313 | 329 | {
|
314 | 330 | "cell_type": "code",
|
315 | 331 | "execution_count": null,
|
316 | 332 | "id": "a5ea70df-98ba-4a69-aeec-2baed34a72fc",
|
317 |
| - "metadata": {}, |
| 333 | + "metadata": { |
| 334 | + "vscode": { |
| 335 | + "languageId": "r" |
| 336 | + } |
| 337 | + }, |
318 | 338 | "outputs": [],
|
319 | 339 | "source": [
|
320 | 340 | "library(DESeq2)\n",
|
|
337 | 357 | "cell_type": "code",
|
338 | 358 | "execution_count": null,
|
339 | 359 | "id": "e49c7007-61b3-4057-89de-3f3fb1085800",
|
340 |
| - "metadata": {}, |
| 360 | + "metadata": { |
| 361 | + "vscode": { |
| 362 | + "languageId": "r" |
| 363 | + } |
| 364 | + }, |
341 | 365 | "outputs": [],
|
342 | 366 | "source": [
|
343 | 367 | "# download data files from storage bucket\n",
|
|
372 | 396 | "cell_type": "code",
|
373 | 397 | "execution_count": null,
|
374 | 398 | "id": "5fce0d5b-740b-4f9c-8479-fa60a0566532",
|
375 |
| - "metadata": {}, |
| 399 | + "metadata": { |
| 400 | + "vscode": { |
| 401 | + "languageId": "r" |
| 402 | + } |
| 403 | + }, |
376 | 404 | "outputs": [],
|
377 | 405 | "source": [
|
378 | 406 | "DESeq.ds <- DESeqDataSetFromMatrix(countData = round(readcounts), colData = sample_info, design = ~condition)\n"
|
|
392 | 420 | "cell_type": "code",
|
393 | 421 | "execution_count": null,
|
394 | 422 | "id": "cb7c8095-8d5c-4165-a0bf-9acfec7d97fe",
|
395 |
| - "metadata": {}, |
| 423 | + "metadata": { |
| 424 | + "vscode": { |
| 425 | + "languageId": "r" |
| 426 | + } |
| 427 | + }, |
396 | 428 | "outputs": [],
|
397 | 429 | "source": [
|
398 | 430 | "colData(DESeq.ds) %>% head\n",
|
|
417 | 449 | "cell_type": "code",
|
418 | 450 | "execution_count": null,
|
419 | 451 | "id": "4fc2ab39-8608-435b-a8f9-fb6f379be65a",
|
420 |
| - "metadata": {}, |
| 452 | + "metadata": { |
| 453 | + "vscode": { |
| 454 | + "languageId": "r" |
| 455 | + } |
| 456 | + }, |
421 | 457 | "outputs": [],
|
422 | 458 | "source": [
|
423 | 459 | "DESeq.ds <- DESeq.ds[ rowSums(counts(DESeq.ds)) > 0, ]\n",
|
|
441 | 477 | "cell_type": "code",
|
442 | 478 | "execution_count": null,
|
443 | 479 | "id": "40aa5366-4c12-4443-99a9-39ac707639d3",
|
444 |
| - "metadata": {}, |
| 480 | + "metadata": { |
| 481 | + "vscode": { |
| 482 | + "languageId": "r" |
| 483 | + } |
| 484 | + }, |
445 | 485 | "outputs": [],
|
446 | 486 | "source": [
|
447 | 487 | "# Get the size factor using estimateSizeFactors from DESeq.\n",
|
|
466 | 506 | "cell_type": "code",
|
467 | 507 | "execution_count": null,
|
468 | 508 | "id": "38523c0a-b9af-49ca-b7e8-2dbb4604fe5c",
|
469 |
| - "metadata": {}, |
| 509 | + "metadata": { |
| 510 | + "vscode": { |
| 511 | + "languageId": "r" |
| 512 | + } |
| 513 | + }, |
470 | 514 | "outputs": [],
|
471 | 515 | "source": [
|
472 | 516 | "# transform size-factor normalized read counts to log2 scale using pseudocount of 1\n",
|
|
495 | 539 | "cell_type": "code",
|
496 | 540 | "execution_count": null,
|
497 | 541 | "id": "a283f6d7-3370-41e7-a85c-c54c695f721f",
|
498 |
| - "metadata": {}, |
| 542 | + "metadata": { |
| 543 | + "vscode": { |
| 544 | + "languageId": "r" |
| 545 | + } |
| 546 | + }, |
499 | 547 | "outputs": [],
|
500 | 548 | "source": [
|
501 | 549 | "# mean-sd plot\n",
|
|
523 | 571 | "cell_type": "code",
|
524 | 572 | "execution_count": null,
|
525 | 573 | "id": "8f444ca6-b525-4e2e-9f9e-4363663b45fc",
|
526 |
| - "metadata": {}, |
| 574 | + "metadata": { |
| 575 | + "vscode": { |
| 576 | + "languageId": "r" |
| 577 | + } |
| 578 | + }, |
527 | 579 | "outputs": [],
|
528 | 580 | "source": [
|
529 | 581 | "# Regularized log-transformed values\n",
|
|
551 | 603 | "cell_type": "code",
|
552 | 604 | "execution_count": null,
|
553 | 605 | "id": "8ca292e8-27a2-4336-b047-07751cef499d",
|
554 |
| - "metadata": {}, |
| 606 | + "metadata": { |
| 607 | + "vscode": { |
| 608 | + "languageId": "r" |
| 609 | + } |
| 610 | + }, |
555 | 611 | "outputs": [],
|
556 | 612 | "source": [
|
557 | 613 | "# cor() calculates the correlation between columns of a matrix\n",
|
|
579 | 635 | "cell_type": "code",
|
580 | 636 | "execution_count": null,
|
581 | 637 | "id": "b38f5fab-4b48-4e74-9e23-793e202312dc",
|
582 |
| - "metadata": {}, |
| 638 | + "metadata": { |
| 639 | + "vscode": { |
| 640 | + "languageId": "r" |
| 641 | + } |
| 642 | + }, |
583 | 643 | "outputs": [],
|
584 | 644 | "source": [
|
585 | 645 | "P <- plotPCA(DESeq.rlog)\n",
|
|
602 | 662 | "cell_type": "code",
|
603 | 663 | "execution_count": null,
|
604 | 664 | "id": "f36513db-22f6-417f-aaa8-39ba1837ca35",
|
605 |
| - "metadata": {}, |
| 665 | + "metadata": { |
| 666 | + "vscode": { |
| 667 | + "languageId": "r" |
| 668 | + } |
| 669 | + }, |
606 | 670 | "outputs": [],
|
607 | 671 | "source": [
|
608 | 672 | "# DESeq2 uses the levels of the condition to determine the order of the comparison\n",
|
|
625 | 689 | "cell_type": "code",
|
626 | 690 | "execution_count": null,
|
627 | 691 | "id": "17827bd4-0938-49e8-9f3a-b4e979fedb3a",
|
628 |
| - "metadata": {}, |
| 692 | + "metadata": { |
| 693 | + "vscode": { |
| 694 | + "languageId": "r" |
| 695 | + } |
| 696 | + }, |
629 | 697 | "outputs": [],
|
630 | 698 | "source": [
|
631 | 699 | "#Check the results of deseq analysis\n",
|
|
646 | 714 | "cell_type": "code",
|
647 | 715 | "execution_count": null,
|
648 | 716 | "id": "39a986b0-3ee2-436a-927b-37f539e7c627",
|
649 |
| - "metadata": {}, |
| 717 | + "metadata": { |
| 718 | + "vscode": { |
| 719 | + "languageId": "r" |
| 720 | + } |
| 721 | + }, |
650 | 722 | "outputs": [],
|
651 | 723 | "source": [
|
652 | 724 | "#Histogram\n",
|
|
668 | 740 | "cell_type": "code",
|
669 | 741 | "execution_count": null,
|
670 | 742 | "id": "079404d0-d179-4bdf-8a2a-63e135b68c45",
|
671 |
| - "metadata": {}, |
| 743 | + "metadata": { |
| 744 | + "vscode": { |
| 745 | + "languageId": "r" |
| 746 | + } |
| 747 | + }, |
672 | 748 | "outputs": [],
|
673 | 749 | "source": [
|
674 | 750 | "#MA plot\n",
|
|
692 | 768 | "cell_type": "code",
|
693 | 769 | "execution_count": null,
|
694 | 770 | "id": "5bdda266-08e2-47fd-bdd8-f06cd4dd7f0c",
|
695 |
| - "metadata": {}, |
| 771 | + "metadata": { |
| 772 | + "vscode": { |
| 773 | + "languageId": "r" |
| 774 | + } |
| 775 | + }, |
696 | 776 | "outputs": [],
|
697 | 777 | "source": [
|
698 | 778 | "#HEATMAP\n",
|
|
736 | 816 | "cell_type": "code",
|
737 | 817 | "execution_count": null,
|
738 | 818 | "id": "38f62bcf-3150-417e-bebb-787763db04aa",
|
739 |
| - "metadata": {}, |
| 819 | + "metadata": { |
| 820 | + "vscode": { |
| 821 | + "languageId": "r" |
| 822 | + } |
| 823 | + }, |
740 | 824 | "outputs": [],
|
741 | 825 | "source": [
|
742 | 826 | "write.table(DGE.results.sorted, file=\"rna-seq_dge-results.txt\", sep = \"\\t\")"
|
|
0 commit comments