Class: LLaMACpp::Context

Inherits:
Object
  • Object
show all
Defined in:
ext/llama_cpp/dummy.rb

Overview

Class for context

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(model:, params:) ⇒ Context

Create context.

Parameters:



842
# File 'ext/llama_cpp/dummy.rb', line 842

def initialize(model:, params:); end

Instance Attribute Details

#modelModel (readonly)

Returns the model.

Returns:



836
837
838
# File 'ext/llama_cpp/dummy.rb', line 836

def model
  @model
end

Instance Method Details

#decode(batch) ⇒ NilClass

Evaluates the tokens.

Parameters:

  • batch (Batch)

    The batch.

Returns:

  • (NilClass)


854
# File 'ext/llama_cpp/dummy.rb', line 854

def decode(batch); end

#embeddingsArray<Float>

Returns the embeddings.

Returns:

  • (Array<Float>)

    shape: (n_tokens * n_embd)



864
# File 'ext/llama_cpp/dummy.rb', line 864

def embeddings; end

#embeddings_ith(i) ⇒ Array<Float>

Returns the embeddings for i-th token.

Parameters:

  • i (Integer)

    The token id.

Returns:

  • (Array<Float>)

    shape: (n_embd)



870
# File 'ext/llama_cpp/dummy.rb', line 870

def embeddings_ith(i); end

#embeddings_seq(seq_id) ⇒ Array<Float>

Returns the embeddings for a sequence id.

Parameters:

  • seq_id (Integer)

    The sequence id.

Returns:

  • (Array<Float>)

    shape: (n_embd)



876
# File 'ext/llama_cpp/dummy.rb', line 876

def embeddings_seq(seq_id); end

#encode(batch) ⇒ NilClass

Processes a batch of tokens with the ecoder part of the encoder-decoder model.

Parameters:

  • batch (Batch)

    The batch.

Returns:

  • (NilClass)


848
# File 'ext/llama_cpp/dummy.rb', line 848

def encode(batch); end

#grammar_accept_token(grammar:, token:) ⇒ Nil

Accepts the sampled token into the grammar

Parameters:

  • grammar (Grammar)

    The grammar.

  • token (Integer)

    The token.

Returns:

  • (Nil)


1150
# File 'ext/llama_cpp/dummy.rb', line 1150

def grammar_accept_token(grammar:, token:); end

#kv_cache_clearNilClass

Clear the KV cache.

Returns:

  • (NilClass)


944
# File 'ext/llama_cpp/dummy.rb', line 944

def kv_cache_clear(); end

#kv_cache_defragNilClass

Defragment the KV cache.

Returns:

  • (NilClass)


992
# File 'ext/llama_cpp/dummy.rb', line 992

def kv_cache_defrag(); end

#kv_cache_seq_add(seq_id, p0, p1, delta) ⇒ NilClass

Adds relative position “delta” to all tokens that belong to the specified sequence and have positions in [p0, p1)

Parameters:

  • seq_id (Integer)

    The sequence id.

  • p0 (Integer)

    The start position.

  • p1 (Integer)

    The end position.

  • delta (Integer)

    The relative position.

Returns:

  • (NilClass)


972
# File 'ext/llama_cpp/dummy.rb', line 972

def kv_cache_seq_add(seq_id, p0, p1, delta); end

#kv_cache_seq_cp(seq_id_src, seq_id_dst, p0, p1) ⇒ NilClass

Copies all tokens that belong to the specified sequnce to another sequence.

Parameters:

  • seq_id_src (Integer)

    The source sequence id.

  • seq_id_dst (Integer)

    The destination sequence id.

  • p0 (Integer)

    The start position.

  • p1 (Integer)

    The end position.

Returns:

  • (NilClass)


961
# File 'ext/llama_cpp/dummy.rb', line 961

def kv_cache_seq_cp(seq_id_src, seq_id_dst, p0, p1); end

#kv_cache_seq_div(seq_id, p0, p1, d) ⇒ NilClass

Integer division of the positions by factor of ‘d > 1`

Parameters:

  • seq_id (Integer)

    The sequence id.

  • p0 (Integer)

    The start position.

  • p1 (Integer)

    The end position.

  • d (Integer)

    The factor.

Returns:

  • (NilClass)


981
# File 'ext/llama_cpp/dummy.rb', line 981

def kv_cache_seq_div(seq_id, p0, p1, d); end

#kv_cache_seq_keep(seq_id) ⇒ Object



963
# File 'ext/llama_cpp/dummy.rb', line 963

def kv_cache_seq_keep(seq_id); end

#kv_cache_seq_pos_max(seq_id) ⇒ Integer

Returns the maximum position present in the KV cache for the specified sequence

Parameters:

  • seq_id (Integer)

    The sequence id.

Returns:

  • (Integer)


987
# File 'ext/llama_cpp/dummy.rb', line 987

def kv_cache_seq_pos_max(seq_id); end

#kv_cache_seq_rm(seq_id, p0, p1) ⇒ NilClass

Removes all tokens that belong to the specified sequence and have positions in [p0, p1).

Parameters:

  • seq_id (Integer)

    The sequence id.

  • p0 (Integer)

    The start position.

  • p1 (Integer)

    The end position.

Returns:

  • (NilClass)


952
# File 'ext/llama_cpp/dummy.rb', line 952

def kv_cache_seq_rm(seq_id, p0, p1); end

#kv_cache_token_countInteger

Returns the number of tokens in the kv cache.

Returns:

  • (Integer)


939
# File 'ext/llama_cpp/dummy.rb', line 939

def kv_cache_token_count; end

#kv_cache_updateNilClass

Apply the KV cache updates.

Returns:

  • (NilClass)


997
# File 'ext/llama_cpp/dummy.rb', line 997

def kv_cache_update(); end

#load_session_file(session_path:) ⇒ Array<Integer>

Loads session file.

Parameters:

  • session_path (String)

    The path to the session file.

Returns:

  • (Array<Integer>)


1019
# File 'ext/llama_cpp/dummy.rb', line 1019

def load_session_file(session_path:); end

#logitsArray<Float>

Returns the logits.

Returns:

  • (Array<Float>)


859
# File 'ext/llama_cpp/dummy.rb', line 859

def logits(); end

#n_batchInteger

Returns the number of batch.

Returns:

  • (Integer)


899
# File 'ext/llama_cpp/dummy.rb', line 899

def n_batch; end

#n_ctxInteger

Returns the number of text context.

Returns:

  • (Integer)


894
# File 'ext/llama_cpp/dummy.rb', line 894

def n_ctx; end

#n_seq_maxInteger

Returns the max number of sequences.

Returns:

  • (Integer)


909
# File 'ext/llama_cpp/dummy.rb', line 909

def n_seq_max; end

#n_threadsInteger

Returns the number of threads.

Returns:

  • (Integer)


914
# File 'ext/llama_cpp/dummy.rb', line 914

def n_threads; end

#n_threads_batchInteger

Returns the number of threads for batch processing.

Returns:

  • (Integer)


919
# File 'ext/llama_cpp/dummy.rb', line 919

def n_threads_batch; end

#n_ubatchInteger

Returns the physical maximum batch size.

Returns:

  • (Integer)


904
# File 'ext/llama_cpp/dummy.rb', line 904

def n_ubatch; end

#pooling_typeInteger

Returns the pooling type.

Returns:

  • (Integer)


1155
# File 'ext/llama_cpp/dummy.rb', line 1155

def pooling_type(); end

Prints timings.

Returns:

  • (NilClass)


929
# File 'ext/llama_cpp/dummy.rb', line 929

def print_timings; end

#reset_timingsNilClass

Resets timings.

Returns:

  • (NilClass)


934
# File 'ext/llama_cpp/dummy.rb', line 934

def reset_timings; end

#sample_apply_guidance(logits:, logits_guidance:, scale:) ⇒ Object

Apply classifier-free guidance to the logits.

Parameters:

  • logits (Array<Float>)

    The logits extracted from the original generation context.

  • logits_guidance (Array<Float>)

    The logits extracted from the separate context from the same model.

  • scale (Float)

    The guidance strength.



1043
# File 'ext/llama_cpp/dummy.rb', line 1043

def sample_apply_guidance(logits:, logits_guidance:, scale:); end

#sample_entropy(candidates, min_temp:, max_temp:, exponent_val:) ⇒ Nil

Samples dynamic temeperature.

Parameters:

  • candidates (TokenDataArray)

    The array of token data.

  • min_temp (Float)

    The minimum temperature.

  • max_temp (Float)

    The maximum temperature.

  • exponent_val (Float)

    The exponent value.

Returns:

  • (Nil)


1098
# File 'ext/llama_cpp/dummy.rb', line 1098

def sample_entropy(candidates, min_temp:, max_temp:, exponent_val:); end

#sample_grammar(candidates, grammar:) ⇒ Nil

Applies constraints from grammar

Parameters:

Returns:

  • (Nil)


1143
# File 'ext/llama_cpp/dummy.rb', line 1143

def sample_grammar(candidates, grammar:); end

#sample_min_p(candidates, prob:, min_keep: 1) ⇒ Nil

Minimum p sampling.

Parameters:

  • candidates (TokenDataArray)

    The array of token data.

  • prob (Float)

    The probability.

  • min_keep (Integer) (defaults to: 1)

    The minimum number of tokens to keep.

Returns:

  • (Nil)


1073
# File 'ext/llama_cpp/dummy.rb', line 1073

def sample_min_p(candidates, prob:, min_keep: 1); end

#sample_repetition_penalties(candidates, last_n_tokens, penalty_repeat:, penalty_freq:, penalty_present:) ⇒ Nil

Sampling with repetition penalty.

Parameters:

  • candidates (TokenDataArray)

    The array of token data.

  • last_n_tokens (Array<Integer>)

    The last n tokens.

  • penalty_repeat (Float)

    The penalty for repetition.

  • penalty_freq (Float)

    The penalty for frequency.

  • penalty_present (Float)

    The penalty for presence.

Returns:

  • (Nil)


1036
# File 'ext/llama_cpp/dummy.rb', line 1036

def sample_repetition_penalties(candidates, last_n_tokens, penalty_repeat:, penalty_freq:, penalty_present:); end

#sample_softmax(candidates) ⇒ Nil

Sorts candates by their probablities with logits.

Parameters:

Returns:

  • (Nil)


1049
# File 'ext/llama_cpp/dummy.rb', line 1049

def sample_softmax(candidates); end

#sample_tail_free(candidates, z:, min_keep: 1) ⇒ Nil

Tail free samplling.

Parameters:

  • candidates (TokenDataArray)

    The array of token data.

  • z (Float)

    The tail free sampling parameter.

  • min_keep (Integer) (defaults to: 1)

    The minimum number of tokens to keep.

Returns:

  • (Nil)


1081
# File 'ext/llama_cpp/dummy.rb', line 1081

def sample_tail_free(candidates, z:, min_keep: 1); end

#sample_temp(candidates, temp:) ⇒ Nil

Samples temeperature.

Parameters:

  • candidates (TokenDataArray)

    The array of token data.

  • temp (Float)

    The temperature.

Returns:

  • (Nil)


1105
# File 'ext/llama_cpp/dummy.rb', line 1105

def sample_temp(candidates, temp:); end

#sample_token(candidates) ⇒ Integer

Returns the randomly selected token from the candidates based on their probabilities.

Parameters:

Returns:

  • (Integer)


1136
# File 'ext/llama_cpp/dummy.rb', line 1136

def sample_token(candidates); end

#sample_token_greedy(candidates) ⇒ Integer

Returns the selected token with the highest probability.

Parameters:

Returns:

  • (Integer)


1130
# File 'ext/llama_cpp/dummy.rb', line 1130

def sample_token_greedy(candidates); end

#sample_token_mirostat(candidates, tau:, eta:, m:, mu:) ⇒ Array<Integer, Float>

Returns the token with Mirostat 1.0 algorithm.

Parameters:

  • candidates (TokenDataArray)

    The array of token data.

  • tau (Float)

    The target cross-entropy value.

  • eta (Float)

    The learning rate.

  • m (Float)

    The number of tokens considered in the estimation of ‘s_hat`.

  • mu (Float)

    The maximum cross-entropy.

Returns:

  • (Array<Integer, Float>)

    The array of token id and updated mu.



1115
# File 'ext/llama_cpp/dummy.rb', line 1115

def sample_token_mirostat(candidates, tau:, eta:, m:, mu:); end

#sample_token_mirostat_v2(candidates, tau:, eta:, mu:) ⇒ Array<Integer, Float>

Returns the token with Mirostat 2.0 algorithm.

Parameters:

  • candidates (TokenDataArray)

    The array of token data.

  • tau (Float)

    The target cross-entropy value.

  • eta (Float)

    The learning rate.

  • mu (Float)

    The maximum cross-entropy.

Returns:

  • (Array<Integer, Float>)

    The array of token id and updated mu.



1124
# File 'ext/llama_cpp/dummy.rb', line 1124

def sample_token_mirostat_v2(candidates, tau:, eta:, mu:); end

#sample_top_k(candidates, k:, min_keep: 1) ⇒ Nil

Top-K sampling.

Parameters:

  • candidates (TokenDataArray)

    The array of token data.

  • k (Integer)

    The top-k.

  • min_keep (Integer) (defaults to: 1)

    The minimum number of tokens to keep.

Returns:

  • (Nil)


1057
# File 'ext/llama_cpp/dummy.rb', line 1057

def sample_top_k(candidates, k:, min_keep: 1); end

#sample_top_p(candidates, prob:, min_keep: 1) ⇒ Nil

Nucleus sampling.

Parameters:

  • candidates (TokenDataArray)

    The array of token data.

  • prob (Float)

    The probability.

  • min_keep (Integer) (defaults to: 1)

    The minimum number of tokens to keep.

Returns:

  • (Nil)


1065
# File 'ext/llama_cpp/dummy.rb', line 1065

def sample_top_p(candidates, prob:, min_keep: 1); end

#sample_typical(candidates, prob:, min_keep: 1) ⇒ Nil

Typical samplling.

Parameters:

  • candidates (TokenDataArray)

    The array of token data.

  • prob (Float)

    The probability.

  • min_keep (Integer) (defaults to: 1)

    The minimum number of tokens to keep.

Returns:

  • (Nil)


1089
# File 'ext/llama_cpp/dummy.rb', line 1089

def sample_typical(candidates, prob:, min_keep: 1); end

#save_session_file(session_path:, session_tokens:) ⇒ Nil

Saves session file.

Parameters:

  • session_path (String)

    The path to the session file.

  • session_tokens (Array<Integer>)

    The session tokens.

Returns:

  • (Nil)


1026
# File 'ext/llama_cpp/dummy.rb', line 1026

def save_session_file(session_path:, session_tokens:); end

#set_causal_attn(causal_attn) ⇒ NilClass

Sets whether to use causal attention.

Parameters:

  • causal_attn (Boolean)

    The flag to use causal attention.

Returns:

  • (NilClass)


1008
# File 'ext/llama_cpp/dummy.rb', line 1008

def set_causal_attn(causal_attn); end

#set_embeddings(embd) ⇒ NilClass

Sets whether the model is in embeddings model or not.

Parameters:

  • embd (Boolean)

    The flag to return embeddings.

Returns:

  • (NilClass)


882
# File 'ext/llama_cpp/dummy.rb', line 882

def set_embeddings(embd); end

#set_n_threads(n_threads:, n_threads_batch:) ⇒ NilClass

Sets the number of threads used for decoding.

Parameters:

  • n_threads (Integer)

    The number of threads.

  • n_threads_batch (Integer)

    The number of threads for batch processing.

Returns:

  • (NilClass)


889
# File 'ext/llama_cpp/dummy.rb', line 889

def set_n_threads(n_threads:, n_threads_batch:); end

#set_rng_seed(seed) ⇒ Object

Sets the current rng seed.

Parameters:

  • seed (Integer)

    The rng seed.



1002
# File 'ext/llama_cpp/dummy.rb', line 1002

def set_rng_seed(seed); end

#synchronizeNilClass

Wait until all computations are finished.

Returns:

  • (NilClass)


1013
# File 'ext/llama_cpp/dummy.rb', line 1013

def synchronize(); end

#timingsTimings

Returns the timing information

Returns:



924
# File 'ext/llama_cpp/dummy.rb', line 924

def timings; end