|
20 | 20 | "cell_type": "code",
|
21 | 21 | "execution_count": null,
|
22 | 22 | "id": "1c2d28e0",
|
23 |
| - "metadata": {}, |
| 23 | + "metadata": { |
| 24 | + "vscode": { |
| 25 | + "languageId": "r" |
| 26 | + } |
| 27 | + }, |
24 | 28 | "outputs": [],
|
25 | 29 | "source": [
|
26 | 30 | "IRdisplay::display_html('<iframe src = \"../../quiz_files/rna-pre_module.html\" width=95% height=600></iframe>')"
|
|
201 | 205 | "cell_type": "code",
|
202 | 206 | "execution_count": null,
|
203 | 207 | "id": "95cda0d2",
|
204 |
| - "metadata": {}, |
| 208 | + "metadata": { |
| 209 | + "vscode": { |
| 210 | + "languageId": "r" |
| 211 | + } |
| 212 | + }, |
205 | 213 | "outputs": [],
|
206 | 214 | "source": [
|
207 | 215 | "system('chmod +x install_rna_seq_packages.sh' , intern=TRUE)\n",
|
|
245 | 253 | "cell_type": "code",
|
246 | 254 | "execution_count": null,
|
247 | 255 | "id": "6da16610-a83f-42b5-be52-f8116db3fb03",
|
248 |
| - "metadata": {}, |
| 256 | + "metadata": { |
| 257 | + "vscode": { |
| 258 | + "languageId": "r" |
| 259 | + } |
| 260 | + }, |
249 | 261 | "outputs": [],
|
250 | 262 | "source": [
|
251 | 263 | "#Install nexflow, make it exceutable, and update it\n",
|
|
260 | 272 | "metadata": {},
|
261 | 273 | "source": [
|
262 | 274 | "The size of the output data generated by Nextflow is quite large we can mitigate that by storing the temporary and output files to a bucket by setting the 'workDir' and 'params.outdir' to an existing bucket. Make sure you modify the file called rnaseq-aws.config.\n",
|
263 |
| - " \n", |
264 |
| - "`workDir = 's3://your_bucket_name/rna-tmp'` \n", |
265 |
| - "`params.outdir = 's3://your_bucket_name/rna-outputs'`" |
| 275 | + "```\n", |
| 276 | + "aws.region = 'us-east-1' // YOUR AWS REGION\n", |
| 277 | + "queue = 'nextflow-batch-job-queue' // Name of your Job queue \n", |
| 278 | + "workDir = 's3://your_bucket_name/rna-tmp' \n", |
| 279 | + "params.outdir = 's3://your_bucket_name/rna-outputs'\n", |
| 280 | + "```" |
266 | 281 | ]
|
267 | 282 | },
|
268 | 283 | {
|
|
277 | 292 | "cell_type": "code",
|
278 | 293 | "execution_count": null,
|
279 | 294 | "id": "bb2e1f61-eea1-4464-9842-76fabae4c39a",
|
280 |
| - "metadata": {}, |
| 295 | + "metadata": { |
| 296 | + "vscode": { |
| 297 | + "languageId": "r" |
| 298 | + } |
| 299 | + }, |
281 | 300 | "outputs": [],
|
282 | 301 | "source": [
|
283 | 302 | "system('./nextflow run nf-core/rnaseq -c rnaseq-aws.config -profile test,aws', intern=TRUE)"
|
|
314 | 333 | "cell_type": "code",
|
315 | 334 | "execution_count": null,
|
316 | 335 | "id": "a5ea70df-98ba-4a69-aeec-2baed34a72fc",
|
317 |
| - "metadata": {}, |
| 336 | + "metadata": { |
| 337 | + "vscode": { |
| 338 | + "languageId": "r" |
| 339 | + } |
| 340 | + }, |
318 | 341 | "outputs": [],
|
319 | 342 | "source": [
|
320 | 343 | "library(DESeq2)\n",
|
|
337 | 360 | "cell_type": "code",
|
338 | 361 | "execution_count": null,
|
339 | 362 | "id": "e49c7007-61b3-4057-89de-3f3fb1085800",
|
340 |
| - "metadata": {}, |
| 363 | + "metadata": { |
| 364 | + "vscode": { |
| 365 | + "languageId": "r" |
| 366 | + } |
| 367 | + }, |
341 | 368 | "outputs": [],
|
342 | 369 | "source": [
|
343 | 370 | "# download data files from storage bucket\n",
|
|
372 | 399 | "cell_type": "code",
|
373 | 400 | "execution_count": null,
|
374 | 401 | "id": "5fce0d5b-740b-4f9c-8479-fa60a0566532",
|
375 |
| - "metadata": {}, |
| 402 | + "metadata": { |
| 403 | + "vscode": { |
| 404 | + "languageId": "r" |
| 405 | + } |
| 406 | + }, |
376 | 407 | "outputs": [],
|
377 | 408 | "source": [
|
378 | 409 | "DESeq.ds <- DESeqDataSetFromMatrix(countData = round(readcounts), colData = sample_info, design = ~condition)\n"
|
|
392 | 423 | "cell_type": "code",
|
393 | 424 | "execution_count": null,
|
394 | 425 | "id": "cb7c8095-8d5c-4165-a0bf-9acfec7d97fe",
|
395 |
| - "metadata": {}, |
| 426 | + "metadata": { |
| 427 | + "vscode": { |
| 428 | + "languageId": "r" |
| 429 | + } |
| 430 | + }, |
396 | 431 | "outputs": [],
|
397 | 432 | "source": [
|
398 | 433 | "colData(DESeq.ds) %>% head\n",
|
|
417 | 452 | "cell_type": "code",
|
418 | 453 | "execution_count": null,
|
419 | 454 | "id": "4fc2ab39-8608-435b-a8f9-fb6f379be65a",
|
420 |
| - "metadata": {}, |
| 455 | + "metadata": { |
| 456 | + "vscode": { |
| 457 | + "languageId": "r" |
| 458 | + } |
| 459 | + }, |
421 | 460 | "outputs": [],
|
422 | 461 | "source": [
|
423 | 462 | "DESeq.ds <- DESeq.ds[ rowSums(counts(DESeq.ds)) > 0, ]\n",
|
|
441 | 480 | "cell_type": "code",
|
442 | 481 | "execution_count": null,
|
443 | 482 | "id": "40aa5366-4c12-4443-99a9-39ac707639d3",
|
444 |
| - "metadata": {}, |
| 483 | + "metadata": { |
| 484 | + "vscode": { |
| 485 | + "languageId": "r" |
| 486 | + } |
| 487 | + }, |
445 | 488 | "outputs": [],
|
446 | 489 | "source": [
|
447 | 490 | "# Get the size factor using estimateSizeFactors from DESeq.\n",
|
|
466 | 509 | "cell_type": "code",
|
467 | 510 | "execution_count": null,
|
468 | 511 | "id": "38523c0a-b9af-49ca-b7e8-2dbb4604fe5c",
|
469 |
| - "metadata": {}, |
| 512 | + "metadata": { |
| 513 | + "vscode": { |
| 514 | + "languageId": "r" |
| 515 | + } |
| 516 | + }, |
470 | 517 | "outputs": [],
|
471 | 518 | "source": [
|
472 | 519 | "# transform size-factor normalized read counts to log2 scale using pseudocount of 1\n",
|
|
495 | 542 | "cell_type": "code",
|
496 | 543 | "execution_count": null,
|
497 | 544 | "id": "a283f6d7-3370-41e7-a85c-c54c695f721f",
|
498 |
| - "metadata": {}, |
| 545 | + "metadata": { |
| 546 | + "vscode": { |
| 547 | + "languageId": "r" |
| 548 | + } |
| 549 | + }, |
499 | 550 | "outputs": [],
|
500 | 551 | "source": [
|
501 | 552 | "# mean-sd plot\n",
|
|
523 | 574 | "cell_type": "code",
|
524 | 575 | "execution_count": null,
|
525 | 576 | "id": "8f444ca6-b525-4e2e-9f9e-4363663b45fc",
|
526 |
| - "metadata": {}, |
| 577 | + "metadata": { |
| 578 | + "vscode": { |
| 579 | + "languageId": "r" |
| 580 | + } |
| 581 | + }, |
527 | 582 | "outputs": [],
|
528 | 583 | "source": [
|
529 | 584 | "# Regularized log-transformed values\n",
|
|
551 | 606 | "cell_type": "code",
|
552 | 607 | "execution_count": null,
|
553 | 608 | "id": "8ca292e8-27a2-4336-b047-07751cef499d",
|
554 |
| - "metadata": {}, |
| 609 | + "metadata": { |
| 610 | + "vscode": { |
| 611 | + "languageId": "r" |
| 612 | + } |
| 613 | + }, |
555 | 614 | "outputs": [],
|
556 | 615 | "source": [
|
557 | 616 | "# cor() calculates the correlation between columns of a matrix\n",
|
|
579 | 638 | "cell_type": "code",
|
580 | 639 | "execution_count": null,
|
581 | 640 | "id": "b38f5fab-4b48-4e74-9e23-793e202312dc",
|
582 |
| - "metadata": {}, |
| 641 | + "metadata": { |
| 642 | + "vscode": { |
| 643 | + "languageId": "r" |
| 644 | + } |
| 645 | + }, |
583 | 646 | "outputs": [],
|
584 | 647 | "source": [
|
585 | 648 | "P <- plotPCA(DESeq.rlog)\n",
|
|
602 | 665 | "cell_type": "code",
|
603 | 666 | "execution_count": null,
|
604 | 667 | "id": "f36513db-22f6-417f-aaa8-39ba1837ca35",
|
605 |
| - "metadata": {}, |
| 668 | + "metadata": { |
| 669 | + "vscode": { |
| 670 | + "languageId": "r" |
| 671 | + } |
| 672 | + }, |
606 | 673 | "outputs": [],
|
607 | 674 | "source": [
|
608 | 675 | "# DESeq2 uses the levels of the condition to determine the order of the comparison\n",
|
|
625 | 692 | "cell_type": "code",
|
626 | 693 | "execution_count": null,
|
627 | 694 | "id": "17827bd4-0938-49e8-9f3a-b4e979fedb3a",
|
628 |
| - "metadata": {}, |
| 695 | + "metadata": { |
| 696 | + "vscode": { |
| 697 | + "languageId": "r" |
| 698 | + } |
| 699 | + }, |
629 | 700 | "outputs": [],
|
630 | 701 | "source": [
|
631 | 702 | "#Check the results of deseq analysis\n",
|
|
646 | 717 | "cell_type": "code",
|
647 | 718 | "execution_count": null,
|
648 | 719 | "id": "39a986b0-3ee2-436a-927b-37f539e7c627",
|
649 |
| - "metadata": {}, |
| 720 | + "metadata": { |
| 721 | + "vscode": { |
| 722 | + "languageId": "r" |
| 723 | + } |
| 724 | + }, |
650 | 725 | "outputs": [],
|
651 | 726 | "source": [
|
652 | 727 | "#Histogram\n",
|
|
668 | 743 | "cell_type": "code",
|
669 | 744 | "execution_count": null,
|
670 | 745 | "id": "079404d0-d179-4bdf-8a2a-63e135b68c45",
|
671 |
| - "metadata": {}, |
| 746 | + "metadata": { |
| 747 | + "vscode": { |
| 748 | + "languageId": "r" |
| 749 | + } |
| 750 | + }, |
672 | 751 | "outputs": [],
|
673 | 752 | "source": [
|
674 | 753 | "#MA plot\n",
|
|
692 | 771 | "cell_type": "code",
|
693 | 772 | "execution_count": null,
|
694 | 773 | "id": "5bdda266-08e2-47fd-bdd8-f06cd4dd7f0c",
|
695 |
| - "metadata": {}, |
| 774 | + "metadata": { |
| 775 | + "vscode": { |
| 776 | + "languageId": "r" |
| 777 | + } |
| 778 | + }, |
696 | 779 | "outputs": [],
|
697 | 780 | "source": [
|
698 | 781 | "#HEATMAP\n",
|
|
736 | 819 | "cell_type": "code",
|
737 | 820 | "execution_count": null,
|
738 | 821 | "id": "38f62bcf-3150-417e-bebb-787763db04aa",
|
739 |
| - "metadata": {}, |
| 822 | + "metadata": { |
| 823 | + "vscode": { |
| 824 | + "languageId": "r" |
| 825 | + } |
| 826 | + }, |
740 | 827 | "outputs": [],
|
741 | 828 | "source": [
|
742 | 829 | "write.table(DGE.results.sorted, file=\"rna-seq_dge-results.txt\", sep = \"\\t\")"
|
|
0 commit comments