Skip to content

Commit 2fca7e8

Browse files
Changed overlap logic.
1 parent e777219 commit 2fca7e8

2 files changed

Lines changed: 36 additions & 38 deletions

File tree

MACS3/Signal/PairedEndTrack.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1745,7 +1745,7 @@ def _np_sort_search(self,regions):
17451745

17461746
j = np.searchsorted(peak_starts, frag_start, side='left')
17471747
while j < peak_starts.size and peak_starts[j] <= frag_end:
1748-
if peak_ends[j] <= frag_end:
1748+
if peak_ends[j] > frag_start:
17491749
rows.append(row_id)
17501750
columns.append(int(local_peak_indices[j]))
17511751
data.append(int(frag[2]))
@@ -1814,13 +1814,12 @@ def _anndata_ncls(self, regions):
18141814
# Query peaks: peak must be inside fragment
18151815
for peak_idx, (peak_start, peak_end) in enumerate(regions_c):
18161816
for frag_start, frag_end, frag_i in ncls.find_overlap(peak_start, peak_end):
1817-
# enforce peak inside fragment
1818-
if frag_start <= peak_start and peak_end <= frag_end:
1819-
row_id = frag_rows[frag_i]
1820-
if row_id >= 0:
1821-
rows.append(row_id)
1822-
cols.append(local_peak_indices[peak_idx])
1823-
data.append(int(frag_counts[frag_i]))
1817+
# if frag_start <= peak_start and peak_end <= frag_end:
1818+
row_id = frag_rows[frag_i]
1819+
if row_id >= 0:
1820+
rows.append(row_id)
1821+
cols.append(local_peak_indices[peak_idx])
1822+
data.append(int(frag_counts[frag_i]))
18241823

18251824
X = sparse.coo_matrix((data, (rows, cols)),
18261825
shape=(n_cells, peak_counter),

notebooks/500-PBMC-test/anndata_implemetation_dev.ipynb

Lines changed: 29 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
{
2828
"cell_type": "code",
29-
"execution_count": null,
29+
"execution_count": 4,
3030
"id": "804f83bc",
3131
"metadata": {},
3232
"outputs": [],
@@ -48,7 +48,7 @@
4848
},
4949
{
5050
"cell_type": "code",
51-
"execution_count": 48,
51+
"execution_count": 6,
5252
"id": "74fff8ce",
5353
"metadata": {},
5454
"outputs": [],
@@ -104,7 +104,7 @@
104104
"\n",
105105
" j = np.searchsorted(peak_starts, frag_start, side='left')\n",
106106
" while j < peak_starts.size and peak_starts[j] <= frag_end:\n",
107-
" if peak_ends[j] <= frag_end:\n",
107+
" if peak_ends[j] > frag_start:\n",
108108
" rows.append(row_id)\n",
109109
" columns.append(int(local_peak_indices[j]))\n",
110110
" data.append(int(frag[2]))\n",
@@ -118,7 +118,7 @@
118118
},
119119
{
120120
"cell_type": "code",
121-
"execution_count": 49,
121+
"execution_count": 7,
122122
"id": "744ba5ee",
123123
"metadata": {},
124124
"outputs": [
@@ -128,7 +128,7 @@
128128
"text": [
129129
"AnnData object with n_obs × n_vars = 3 × 3\n",
130130
" var: 'chrom', 'start', 'end'\n",
131-
"[[2 0 0]\n",
131+
"[[2 1 0]\n",
132132
" [3 0 0]\n",
133133
" [0 4 0]]\n"
134134
]
@@ -142,7 +142,7 @@
142142
},
143143
{
144144
"cell_type": "code",
145-
"execution_count": 51,
145+
"execution_count": 8,
146146
"id": "a83976eb",
147147
"metadata": {},
148148
"outputs": [],
@@ -204,29 +204,27 @@
204204
" idx = np.arange(len(frag_starts), dtype=np.int64)\n",
205205
" ncls = NCLS(frag_starts, frag_ends, idx)\n",
206206
"\n",
207-
" # Query peaks: peak must be inside fragment\n",
207+
" # Query overlaps: count any fragment that overlaps the peak\n",
208208
" for peak_idx, (peak_start, peak_end) in enumerate(regions_c):\n",
209209
" for frag_start, frag_end, frag_i in ncls.find_overlap(peak_start, peak_end):\n",
210-
" # enforce peak inside fragment\n",
211-
" if frag_start <= peak_start and peak_end <= frag_end:\n",
212-
" row_id = frag_rows[frag_i]\n",
213-
" if row_id >= 0:\n",
214-
" rows.append(row_id)\n",
215-
" cols.append(local_peak_indices[peak_idx])\n",
216-
" data.append(int(frag_counts[frag_i]))\n",
210+
" row_id = frag_rows[frag_i]\n",
211+
" if row_id >= 0:\n",
212+
" rows.append(row_id)\n",
213+
" cols.append(local_peak_indices[peak_idx])\n",
214+
" data.append(int(frag_counts[frag_i]))\n",
217215
" \n",
218216
" X = sparse.coo_matrix((data, (rows, cols)),\n",
219217
" shape=(n_cells, peak_counter),\n",
220218
" dtype=np.int32).tocsr()\n",
221219
"\n",
222220
" obs = pd.DataFrame(index=barcodes)\n",
223221
" var = pd.DataFrame(peak_data, columns=[\"chrom\", \"start\", \"end\"], index=peak_names)\n",
224-
" return ad.AnnData(X=X, obs=obs, var=var)"
222+
" return ad.AnnData(X=X, obs=obs, var=var)\n"
225223
]
226224
},
227225
{
228226
"cell_type": "code",
229-
"execution_count": 52,
227+
"execution_count": 9,
230228
"id": "b7dcf943",
231229
"metadata": {},
232230
"outputs": [
@@ -236,7 +234,7 @@
236234
"text": [
237235
"AnnData object with n_obs × n_vars = 3 × 3\n",
238236
" var: 'chrom', 'start', 'end'\n",
239-
"[[2 0 0]\n",
237+
"[[3 1 0]\n",
240238
" [3 0 0]\n",
241239
" [0 4 0]]\n"
242240
]
@@ -250,7 +248,7 @@
250248
},
251249
{
252250
"cell_type": "code",
253-
"execution_count": null,
251+
"execution_count": 10,
254252
"id": "b6951c0d",
255253
"metadata": {},
256254
"outputs": [],
@@ -376,20 +374,14 @@
376374
},
377375
{
378376
"cell_type": "code",
379-
"execution_count": 54,
377+
"execution_count": 11,
380378
"id": "2e4bd05d",
381379
"metadata": {},
382380
"outputs": [
383381
{
384382
"name": "stdout",
385383
"output_type": "stream",
386384
"text": [
387-
"peak 0 overlaps with fragment 0, row_id: 0, count: 2\n",
388-
"peak 0 overlaps with fragment 1, row_id: 1, count: 3\n",
389-
"peak 0 overlaps with fragment 2, row_id: 0, count: 1\n",
390-
"peak 1 overlaps with fragment 2, row_id: 0, count: 1\n",
391-
"peak 1 overlaps with fragment 3, row_id: 2, count: 4\n",
392-
"remaining fragments: -1, remaining peaks: 1\n",
393385
"AnnData object with n_obs × n_vars = 3 × 3\n",
394386
" var: 'chrom', 'start', 'end'\n",
395387
"[[3 1 0]\n",
@@ -406,19 +398,26 @@
406398
},
407399
{
408400
"cell_type": "code",
409-
"execution_count": 17,
401+
"execution_count": 12,
410402
"id": "3f01629f",
411403
"metadata": {},
412404
"outputs": [
405+
{
406+
"name": "stdout",
407+
"output_type": "stream",
408+
"text": [
409+
"using ncls\n"
410+
]
411+
},
413412
{
414413
"data": {
415414
"text/plain": [
416-
"array([[2, 0, 0],\n",
415+
"array([[3, 1, 0],\n",
417416
" [3, 0, 0],\n",
418417
" [0, 4, 0]], dtype=int32)"
419418
]
420419
},
421-
"execution_count": 17,
420+
"execution_count": 12,
422421
"metadata": {},
423422
"output_type": "execute_result"
424423
}
@@ -441,7 +440,7 @@
441440
],
442441
"metadata": {
443442
"kernelspec": {
444-
"display_name": ".venv (3.12.8)",
443+
"display_name": ".venv (3.9.6)",
445444
"language": "python",
446445
"name": "python3"
447446
},
@@ -455,7 +454,7 @@
455454
"name": "python",
456455
"nbconvert_exporter": "python",
457456
"pygments_lexer": "ipython3",
458-
"version": "3.12.8"
457+
"version": "3.9.6"
459458
}
460459
},
461460
"nbformat": 4,

0 commit comments

Comments
 (0)