|
26 | 26 | }, |
27 | 27 | { |
28 | 28 | "cell_type": "code", |
29 | | - "execution_count": null, |
| 29 | + "execution_count": 4, |
30 | 30 | "id": "804f83bc", |
31 | 31 | "metadata": {}, |
32 | 32 | "outputs": [], |
|
48 | 48 | }, |
49 | 49 | { |
50 | 50 | "cell_type": "code", |
51 | | - "execution_count": 48, |
| 51 | + "execution_count": 6, |
52 | 52 | "id": "74fff8ce", |
53 | 53 | "metadata": {}, |
54 | 54 | "outputs": [], |
|
104 | 104 | "\n", |
105 | 105 | " j = np.searchsorted(peak_starts, frag_start, side='left')\n", |
106 | 106 | " while j < peak_starts.size and peak_starts[j] <= frag_end:\n", |
107 | | - " if peak_ends[j] <= frag_end:\n", |
| 107 | + " if peak_ends[j] > frag_start:\n", |
108 | 108 | " rows.append(row_id)\n", |
109 | 109 | " columns.append(int(local_peak_indices[j]))\n", |
110 | 110 | " data.append(int(frag[2]))\n", |
|
118 | 118 | }, |
119 | 119 | { |
120 | 120 | "cell_type": "code", |
121 | | - "execution_count": 49, |
| 121 | + "execution_count": 7, |
122 | 122 | "id": "744ba5ee", |
123 | 123 | "metadata": {}, |
124 | 124 | "outputs": [ |
|
128 | 128 | "text": [ |
129 | 129 | "AnnData object with n_obs × n_vars = 3 × 3\n", |
130 | 130 | " var: 'chrom', 'start', 'end'\n", |
131 | | - "[[2 0 0]\n", |
| 131 | + "[[2 1 0]\n", |
132 | 132 | " [3 0 0]\n", |
133 | 133 | " [0 4 0]]\n" |
134 | 134 | ] |
|
142 | 142 | }, |
143 | 143 | { |
144 | 144 | "cell_type": "code", |
145 | | - "execution_count": 51, |
| 145 | + "execution_count": 8, |
146 | 146 | "id": "a83976eb", |
147 | 147 | "metadata": {}, |
148 | 148 | "outputs": [], |
|
204 | 204 | " idx = np.arange(len(frag_starts), dtype=np.int64)\n", |
205 | 205 | " ncls = NCLS(frag_starts, frag_ends, idx)\n", |
206 | 206 | "\n", |
207 | | - " # Query peaks: peak must be inside fragment\n", |
| 207 | + " # Query overlaps: count any fragment that overlaps the peak\n", |
208 | 208 | " for peak_idx, (peak_start, peak_end) in enumerate(regions_c):\n", |
209 | 209 | " for frag_start, frag_end, frag_i in ncls.find_overlap(peak_start, peak_end):\n", |
210 | | - " # enforce peak inside fragment\n", |
211 | | - " if frag_start <= peak_start and peak_end <= frag_end:\n", |
212 | | - " row_id = frag_rows[frag_i]\n", |
213 | | - " if row_id >= 0:\n", |
214 | | - " rows.append(row_id)\n", |
215 | | - " cols.append(local_peak_indices[peak_idx])\n", |
216 | | - " data.append(int(frag_counts[frag_i]))\n", |
| 210 | + " row_id = frag_rows[frag_i]\n", |
| 211 | + " if row_id >= 0:\n", |
| 212 | + " rows.append(row_id)\n", |
| 213 | + " cols.append(local_peak_indices[peak_idx])\n", |
| 214 | + " data.append(int(frag_counts[frag_i]))\n", |
217 | 215 | " \n", |
218 | 216 | " X = sparse.coo_matrix((data, (rows, cols)),\n", |
219 | 217 | " shape=(n_cells, peak_counter),\n", |
220 | 218 | " dtype=np.int32).tocsr()\n", |
221 | 219 | "\n", |
222 | 220 | " obs = pd.DataFrame(index=barcodes)\n", |
223 | 221 | " var = pd.DataFrame(peak_data, columns=[\"chrom\", \"start\", \"end\"], index=peak_names)\n", |
224 | | - " return ad.AnnData(X=X, obs=obs, var=var)" |
| 222 | + " return ad.AnnData(X=X, obs=obs, var=var)\n" |
225 | 223 | ] |
226 | 224 | }, |
227 | 225 | { |
228 | 226 | "cell_type": "code", |
229 | | - "execution_count": 52, |
| 227 | + "execution_count": 9, |
230 | 228 | "id": "b7dcf943", |
231 | 229 | "metadata": {}, |
232 | 230 | "outputs": [ |
|
236 | 234 | "text": [ |
237 | 235 | "AnnData object with n_obs × n_vars = 3 × 3\n", |
238 | 236 | " var: 'chrom', 'start', 'end'\n", |
239 | | - "[[2 0 0]\n", |
| 237 | + "[[3 1 0]\n", |
240 | 238 | " [3 0 0]\n", |
241 | 239 | " [0 4 0]]\n" |
242 | 240 | ] |
|
250 | 248 | }, |
251 | 249 | { |
252 | 250 | "cell_type": "code", |
253 | | - "execution_count": null, |
| 251 | + "execution_count": 10, |
254 | 252 | "id": "b6951c0d", |
255 | 253 | "metadata": {}, |
256 | 254 | "outputs": [], |
|
376 | 374 | }, |
377 | 375 | { |
378 | 376 | "cell_type": "code", |
379 | | - "execution_count": 54, |
| 377 | + "execution_count": 11, |
380 | 378 | "id": "2e4bd05d", |
381 | 379 | "metadata": {}, |
382 | 380 | "outputs": [ |
383 | 381 | { |
384 | 382 | "name": "stdout", |
385 | 383 | "output_type": "stream", |
386 | 384 | "text": [ |
387 | | - "peak 0 overlaps with fragment 0, row_id: 0, count: 2\n", |
388 | | - "peak 0 overlaps with fragment 1, row_id: 1, count: 3\n", |
389 | | - "peak 0 overlaps with fragment 2, row_id: 0, count: 1\n", |
390 | | - "peak 1 overlaps with fragment 2, row_id: 0, count: 1\n", |
391 | | - "peak 1 overlaps with fragment 3, row_id: 2, count: 4\n", |
392 | | - "remaining fragments: -1, remaining peaks: 1\n", |
393 | 385 | "AnnData object with n_obs × n_vars = 3 × 3\n", |
394 | 386 | " var: 'chrom', 'start', 'end'\n", |
395 | 387 | "[[3 1 0]\n", |
|
406 | 398 | }, |
407 | 399 | { |
408 | 400 | "cell_type": "code", |
409 | | - "execution_count": 17, |
| 401 | + "execution_count": 12, |
410 | 402 | "id": "3f01629f", |
411 | 403 | "metadata": {}, |
412 | 404 | "outputs": [ |
| 405 | + { |
| 406 | + "name": "stdout", |
| 407 | + "output_type": "stream", |
| 408 | + "text": [ |
| 409 | + "using ncls\n" |
| 410 | + ] |
| 411 | + }, |
413 | 412 | { |
414 | 413 | "data": { |
415 | 414 | "text/plain": [ |
416 | | - "array([[2, 0, 0],\n", |
| 415 | + "array([[3, 1, 0],\n", |
417 | 416 | " [3, 0, 0],\n", |
418 | 417 | " [0, 4, 0]], dtype=int32)" |
419 | 418 | ] |
420 | 419 | }, |
421 | | - "execution_count": 17, |
| 420 | + "execution_count": 12, |
422 | 421 | "metadata": {}, |
423 | 422 | "output_type": "execute_result" |
424 | 423 | } |
|
441 | 440 | ], |
442 | 441 | "metadata": { |
443 | 442 | "kernelspec": { |
444 | | - "display_name": ".venv (3.12.8)", |
| 443 | + "display_name": ".venv (3.9.6)", |
445 | 444 | "language": "python", |
446 | 445 | "name": "python3" |
447 | 446 | }, |
|
455 | 454 | "name": "python", |
456 | 455 | "nbconvert_exporter": "python", |
457 | 456 | "pygments_lexer": "ipython3", |
458 | | - "version": "3.12.8" |
| 457 | + "version": "3.9.6" |
459 | 458 | } |
460 | 459 | }, |
461 | 460 | "nbformat": 4, |
|
0 commit comments