|
106 | 106 | }, |
107 | 107 | { |
108 | 108 | "cell_type": "code", |
109 | | - "execution_count": 6, |
| 109 | + "execution_count": 5, |
110 | 110 | "metadata": {}, |
111 | 111 | "outputs": [], |
112 | 112 | "source": [ |
113 | 113 | "with open('track_hub/hg38/trackDb.txt', 'w') as f_track_hubs:\n", |
114 | 114 | " for tissue in TISSUES:\n", |
| 115 | + " all_tissue_genes = []\n", |
115 | 116 | " try:\n", |
116 | 117 | " for community_id in range(1, 999999):\n", |
117 | 118 | " arr_com = []\n", |
118 | 119 | " dic_community = pickle.load(open(\"svm_results/\" + tissue + '_' + str(community_id) + \".pkl\", \"rb\"))\n", |
119 | | - " len_common = len(dic_community['genes'])\n", |
120 | | - "\n", |
121 | | - " with open(f'track_hub/hg38/{tissue}_{community_id}.bed', 'w') as f:\n", |
122 | | - " for gene in dic_community['genes']:\n", |
123 | | - " if gene in dic_all_genes_info.keys():\n", |
124 | | - " gene_info = dic_all_genes_info[gene]\n", |
125 | | - " f.write(f'{gene_info[\"chr\"]}\\t{gene_info[\"chr_start\"]}\\t{gene_info[\"chr_end\"]}\\n')\n", |
126 | | - " \n", |
127 | | - " f_track_hubs.write(f'track {tissue}_{community_id}\\n')\n", |
128 | | - " f_track_hubs.write(f'bigDataUrl https://raw.githubusercontent.com/tjiagoM/gtex-transcriptome-modelling/master/track_hub/hg38/{tissue}_{community_id}.bb\\n')\n", |
129 | | - " f_track_hubs.write(f'shortLabel {tissue}_{community_id}\\n')\n", |
130 | | - " f_track_hubs.write(f'longLabel {tissue}_{community_id}\\n')\n", |
131 | | - " f_track_hubs.write(f'type bigBed\\n')\n", |
132 | | - " f_track_hubs.write(f'\\n')\n", |
133 | | - "\n", |
| 120 | + " all_tissue_genes.extend(dic_community['genes'])\n", |
134 | 121 | " except Exception as e:\n", |
135 | | - " pass" |
| 122 | + " pass\n", |
| 123 | + " \n", |
| 124 | + " # Removing duplicates\n", |
| 125 | + " all_tissue_genes = list(set(all_tissue_genes))\n", |
| 126 | + " \n", |
| 127 | + " # Saving all the tissue's genes\n", |
| 128 | + " with open(f'track_hub/hg38/{tissue}.bed', 'w') as f:\n", |
| 129 | + " for gene in all_tissue_genes:\n", |
| 130 | + " if gene in dic_all_genes_info.keys():\n", |
| 131 | + " gene_info = dic_all_genes_info[gene]\n", |
| 132 | + " f.write(f'{gene_info[\"chr\"]}\\t{gene_info[\"chr_start\"]}\\t{gene_info[\"chr_end\"]}\\n')\n", |
| 133 | + " # Some genes are separated with a '-' rather than with a '.'\n", |
| 134 | + " elif gene.replace('.', '-') in dic_all_genes_info.keys():\n", |
| 135 | + " gene_info = dic_all_genes_info[gene.replace('.', '-')]\n", |
| 136 | + " f.write(f'{gene_info[\"chr\"]}\\t{gene_info[\"chr_start\"]}\\t{gene_info[\"chr_end\"]}\\n')\n", |
| 137 | + "\n", |
| 138 | + " f_track_hubs.write(f'track {tissue}\\n')\n", |
| 139 | + " f_track_hubs.write(f'bigDataUrl https://raw.githubusercontent.com/tjiagoM/gtex-transcriptome-modelling/master/track_hub/hg38/{tissue}.bb\\n')\n", |
| 140 | + " f_track_hubs.write(f'shortLabel {tissue}\\n')\n", |
| 141 | + " f_track_hubs.write(f'longLabel {tissue}\\n')\n", |
| 142 | + " f_track_hubs.write(f'type bigBed\\n')\n", |
| 143 | + " f_track_hubs.write(f'visibility full\\n')\n", |
| 144 | + " f_track_hubs.write(f'\\n')" |
136 | 145 | ] |
137 | 146 | }, |
138 | 147 | { |
|
0 commit comments