Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
class EmptyCorpusException(Exception):
"""Raised when the user tries to initialize a retrieval algorithm with an empty corpus"""
pass
6 changes: 5 additions & 1 deletion rank_bm25.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import numpy as np
from multiprocessing import Pool, cpu_count

from exceptions import EmptyCorpusException

"""
All of these algorithms have been taken from the paper:
Trotmam et al, Improvements to BM25 and Language Models Examined
Expand All @@ -28,6 +30,8 @@ def __init__(self, corpus, tokenizer=None):
self._calc_idf(nd)

def _initialize(self, corpus):
if len(corpus) == 0:
raise EmptyCorpusException
nd = {} # word -> number of documents with word
num_doc = 0
for document in corpus:
Expand All @@ -43,7 +47,7 @@ def _initialize(self, corpus):

for word, freq in frequencies.items():
try:
nd[word]+=1
nd[word] += 1
except KeyError:
nd[word] = 1

Expand Down
15 changes: 15 additions & 0 deletions tests/test_exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import sys
import os
myPath = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, myPath + '/../')

import pytest

from rank_bm25 import BM25
from exceptions import EmptyCorpusException


def test_empty_corpus():
"""Make sure that correct Exception is thrown when any algorithm initializes with an empty corpus"""
with pytest.raises(EmptyCorpusException):
BM25([])