magnet.utils package

Subpackages

Submodules

magnet.utils.data_classes module

class magnet.utils.data_classes.AcquireParams(resource_id: str, data_source: str, location: str, acquisition_options: Dict[str, Any])[source]

Bases: object

acquisition_options: Dict[str, Any]
data_source: str
location: str
resource_id: str
class magnet.utils.data_classes.AskParameters(m: str = 'mistralai/Mistral-7B-Instruct-v0.1', q: str = 'What is your itinerary?', t: float = 1.0, n: int = 8096, p: str = 'qa_ref', cb: Optional[Callable] = None, docs: List[str] = <factory>, context: str = '', vllm: bool = False)[source]

Bases: object

cb: Callable | None = None
context: str = ''
docs: List[str]
m: str = 'mistralai/Mistral-7B-Instruct-v0.1'
n: int = 8096
p: str = 'qa_ref'
q: str = 'What is your itinerary?'
t: float = 1.0
vllm: bool = False
class magnet.utils.data_classes.EmbeddingPayload(document: str, embedding: list, content: list, model: str)[source]

Bases: object

Represents a payload for embedding text data.

Attributes:

document (str): The document associated with the text data. embedding (list): The embedding of the text data. text (list): The text of the data. model (str): The model used for embedding the text data.

content: list
document: str
embedding: list
model: str
class magnet.utils.data_classes.FilePayload(data: bytes, original_filename: str, _id: str)[source]

Bases: object

Represents a payload with two main fields: data and _id.

Args:

data (list): The bytearray associated with the payload. _id (str): The document associated with the payload.

data: bytes
original_filename: str
class magnet.utils.data_classes.GeneratedPayload(query: str, prompt: str, context: list, result: str, model: str)[source]

Bases: object

Represents a payload generated by a system.

Args:

query (str): The query associated with the payload. prompt (str): The prompt associated with the payload. context (list): The context associated with the payload. result (str): The result generated by the system. model (str): The model used to generate the payload.

context: list
model: str
prompt: str
query: str
result: str
class magnet.utils.data_classes.IndexConfig(milvus_uri: Optional[str] = None, milvus_port: Optional[int] = None, milvus_user: Optional[str] = None, milvus_password: Optional[str] = None, dimension: Optional[int] = None, model: Optional[str] = None, name: Optional[str] = None, options: Dict[Optional[dict], Any] = <factory>)[source]

Bases: object

dimension: int | None = None
milvus_password: str | None = None
milvus_port: int | None = None
milvus_uri: str | None = None
milvus_user: str | None = None
model: str | None = None
name: str | None = None
options: Dict[dict | None, Any]
class magnet.utils.data_classes.InferenceParams(resource_id: str, location: str, data_source: str, model_id: str, inference_options: Dict[str, Any])[source]

Bases: object

data_source: str
inference_options: Dict[str, Any]
location: str
model_id: str
resource_id: str
class magnet.utils.data_classes.Job(params: magnet.utils.data_classes.AcquireParams | magnet.utils.data_classes.TrainParams | magnet.utils.data_classes.InferenceParams | magnet.utils.data_classes.ProcessParams, _type: str, _id: str, _isClaimed: bool = False)[source]

Bases: object

params: AcquireParams | TrainParams | InferenceParams | ProcessParams
class magnet.utils.data_classes.JobParams(milvus_host: str, milvus_port: int, milvus_username: str, milvus_password: str, milvus_collection: str, nats_host: str, nats_username: str, nats_password: str, nats_stream: str, nats_category: str, job_type: str, job_n: int, embedding_model: str, generation_model: str)[source]

Bases: object

embedding_model: str
generation_model: str
job_n: int
job_type: str
milvus_collection: str
milvus_host: str
milvus_password: str
milvus_port: int
milvus_username: str
nats_category: str
nats_host: str
nats_password: str
nats_stream: str
nats_username: str
class magnet.utils.data_classes.MagnetConfig(host: str, domain: str = None, credentials: str = None, session: str = None, stream_name: str = None, category: str = None, kv_name: str = None, os_name: str = None, index: magnet.utils.data_classes.IndexConfig | None = None)[source]

Bases: object

category: str = None
credentials: str = None
domain: str = None
host: str
index: IndexConfig | None = None
kv_name: str = None
os_name: str = None
session: str = None
stream_name: str = None
class magnet.utils.data_classes.MistralArgs(dim: int, n_layers: int, head_dim: int, hidden_dim: int, n_heads: int, n_kv_heads: int, norm_eps: float, vocab_size: int)[source]

Bases: object

Represents a set of arguments for the Mistral model.

Args:

dim (int): The dimensionality of the model. n_layers (int): The number of layers in the model. head_dim (int): The dimensionality of each attention head. hidden_dim (int): The dimensionality of the hidden layer in the feed-forward network. n_heads (int): The number of attention heads. n_kv_heads (int): The number of attention heads used for key-value attention. norm_eps (float): The epsilon value used for numerical stability in layer normalization. vocab_size (int): The size of the vocabulary used in the model.

dim: int
head_dim: int
hidden_dim: int
n_heads: int
n_kv_heads: int
n_layers: int
norm_eps: float
vocab_size: int
class magnet.utils.data_classes.Payload(content: object | list | str | dict | ndarray, _id: str)[source]

Bases: object

Represents a payload with two main fields: text and document.

Args:

content (object | list | str | dict): The information associated with the payload. _id (str): The id associated with the payload.

content: object | list | str | dict | ndarray
class magnet.utils.data_classes.ProcessParams(resource_id: str, location: str, model: str, data_source: str, processing_options: Dict[str, Any])[source]

Bases: object

data_source: str
location: str
model: str
processing_options: Dict[str, Any]
resource_id: str
class magnet.utils.data_classes.Run(_id: str, _job: magnet.utils.data_classes.Job, _type: str, start_time: str, status: str | None = None, end_time: str | None = None, results: Dict[str, Any] | None = None, metrics: Dict[str, Any] | None = None)[source]

Bases: object

end_time: str | None = None
metrics: Dict[str, Any] | None = None
results: Dict[str, Any] | None = None
start_time: str
status: str | None = None
class magnet.utils.data_classes.Status(timestamp: datetime.datetime, type: str, content: str)[source]

Bases: object

content: str
timestamp: datetime
type: str
class magnet.utils.data_classes.TrainParams(resource_id: str, data_source: str, model: str, training_options: Dict[str, Any])[source]

Bases: object

data_source: str
model: str
resource_id: str
training_options: Dict[str, Any]

magnet.utils.globals module

class magnet.utils.globals.Utils[source]

Bases: object

The Utils class provides various utility functions for tasks such as checking CUDA availability, normalizing text, and uploading files to Amazon S3.

Example Usage:

# Create an instance of the Utils class utils = Utils()

# Check if CUDA is available utils.check_cuda()

# Normalize a text string normalized_text = utils.normalize_text(” This is a sample text. “)

# Upload a file to Amazon S3 utils.upload_to_s3(file_or_dir=”path/to/file.txt”, keys=(“YOUR_ACCESS_KEY”, “YOUR_SECRET_KEY”), bucket=”my-bucket”, bucket_path=”data”)

Main functionalities: - Checking if CUDA is available on the machine - Normalizing text by performing various cleaning operations - Uploading files to Amazon S3

Methods: - __init__(): Initializes the Utils class and sets up the spaCy English language model with a sentence tokenizer. - check_cuda(): Checks if CUDA is available on the machine and provides additional information if it is. - normalize_text(_): Cleans a text string by removing whitespace, replacing characters, and removing curly braces. - upload_to_s3(file_or_dir, keys, bucket, bucket_path): Uploads a file or directory to an Amazon S3 bucket using the provided access keys and bucket information.

Fields: The Utils class does not have any fields.

check_cuda()[source]

The function checks if CUDA is available on the machine and provides additional information if it is.

normalize_text(_)[source]

The clean function takes a string as input and performs various cleaning operations on it, such as removing whitespace, replacing characters, and removing curly braces.

Parameters:

_ – The parameter “_” is a placeholder for the input string that needs to be cleaned

Returns:

a cleaned version of the input string.

upload_to_s3(file_or_dir: str = None, keys: tuple = ('YOUR_ACCESS_KEY', 'YOUR_SECRET_KEY'), bucket: str = None, bucket_path: str = None)[source]

Uploads a file or directory to an Amazon S3 bucket.

Args:

file_or_dir (str): The path of the file or directory to be uploaded. keys (tuple): A tuple containing the access keys (access key ID and secret access key) required to access the Amazon S3 bucket. Default is (“YOUR_ACCESS_KEY”, “YOUR_SECRET_KEY”). bucket (str): The name of the Amazon S3 bucket where the file or directory will be uploaded. bucket_path (str): The path within the bucket where the file or directory will be uploaded.

Returns:

None

Raises:

None

Example Usage:

# Create an instance of the Utils class utils = Utils()

# Upload a file to Amazon S3 utils.upload_to_s3(file_or_dir=”path/to/file.txt”, keys=(“YOUR_ACCESS_KEY”, “YOUR_SECRET_KEY”), bucket=”my-bucket”, bucket_path=”data”)

magnet.utils.globals.break_into_chunks(text, max_words)[source]

Break a text into chunks of a specified maximum number of words.

Parameters: - text (str): The text to break into chunks. - max_words (int): The maximum number of words per chunk.

Returns: - List[str]: A list of text chunks, each with up to max_words words.

magnet.utils.globals.reversal()[source]

magnet.utils.huggingface module

magnet.utils.local module

magnet.utils.markdown module

magnet.utils.milvus module

magnet.utils.prompts module