Class: LLaMACpp::ContextParams
- Inherits:
-
Object
- Object
- LLaMACpp::ContextParams
- Defined in:
- ext/llama_cpp/dummy.rb
Overview
Class for parameters of context.
Instance Method Summary collapse
-
#attention_type ⇒ Integer
Returns the attention type.
-
#attention_type=(attention_type) ⇒ Object
Sets the attention type.
-
#defrag_thold ⇒ Float
Returns defragment the KV cache.
-
#defrag_thold=(defrag_thold) ⇒ Object
Sets the defragment the KV cache.
-
#embeddings ⇒ Boolean
Returns the flag for embeddings mode only.
-
#embeddings=(flag) ⇒ Object
Sets the flag for embeddings mode only.
-
#flash_attn ⇒ Boolean
Returns the flag whether to use flash attention.
-
#flash_attn=(flag) ⇒ Object
Sets the flag whether to use flash attention.
-
#logits_all ⇒ Boolean
Returns the flag to compute all logits.
-
#logits_all=(flag) ⇒ Object
Sets the flag to compute all logits.
-
#n_batch ⇒ Integer
Returns the logical maximum batch size.
-
#n_batch=(n_batch) ⇒ Object
Sets the logical maximum batch size.
-
#n_ctx ⇒ Integer
Returns the number of text context.
-
#n_ctx=(n_ctx) ⇒ Object
Sets the number of text context.
-
#n_seq_max ⇒ Integer
Returns the max number of sequences.
-
#n_seq_max=(n_seq_max) ⇒ Object
Sets the max number of sequences.
-
#n_ubatch ⇒ Integer
Returns the physical maximum batch size.
-
#n_ubatch=(n_ubatch) ⇒ Object
Sets the physical maximum batch size.
-
#offload_kqv=(flag) ⇒ Object
Sets the flag whether to offload the KQV ops.
-
#offload_kwv ⇒ Boolean
Returns the flag whether to offload the KQV ops.
-
#pooling_type ⇒ Integer
Returns the pooling type.
-
#pooling_type=(pooling_type) ⇒ Object
Sets the pooling type.
-
#rope_freq_base ⇒ Float
Returns the RoPE base frequency.
-
#rope_freq_base=(rope_freq_base) ⇒ Object
Sets the RoPE base frequency.
-
#rope_freq_scale ⇒ Float
Returns the RoPE frequency scaling factor.
-
#rope_freq_scale=(rope_freq_scale) ⇒ Object
Sets the RoPE frequency scaling factor.
-
#rope_scaling_type ⇒ Integer
Returns the RoPE scaling type.
-
#rope_scaling_type=(scaling_type) ⇒ Object
Sets the RoPE scaling type.
-
#seed ⇒ Integer
Return the random seed.
-
#seed=(seed) ⇒ Object
Sets the random seed.
-
#type_k ⇒ Integer
Returns the data type for K cache.
-
#type_k=(type_k) ⇒ Object
Sets the data type for K cache.
-
#type_v ⇒ Integer
Returns the data type for V cache.
-
#type_v=(type_v) ⇒ Object
Sets the data type for V cache.
-
#yarn_attn_factor ⇒ Float
Returns the YaRN magnitude scaling factor.
-
#yarn_attn_factor=(yarn_attn_factor) ⇒ Object
Sets the YaRN magnitude scaling factor.
-
#yarn_beta_fast ⇒ Float
Returns the YaRN low correction dim.
-
#yarn_beta_fast=(yarn_beta_fast) ⇒ Object
Sets the YaRN low correction dim.
-
#yarn_beta_slow ⇒ Float
Returns the YaRN high correction dim.
-
#yarn_beta_slow=(yarn_beta_slow) ⇒ Object
Sets the YaRN high correction dim.
-
#yarn_ext_factor ⇒ Float
Returns the YaRN extrapolation mix factor.
-
#yarn_ext_factor=(yarn_ext_factor) ⇒ Object
Sets the YaRN extrapolation mix factor.
-
#yarn_orig_ctx ⇒ Integer
Returns the YaRN original context size.
-
#yarn_orig_ctx=(yarn_orig_ctx) ⇒ Object
Sets the YaRN original context size.
Instance Method Details
#attention_type ⇒ Integer
Returns the attention type.
1267 |
# File 'ext/llama_cpp/dummy.rb', line 1267 def attention_type; end |
#attention_type=(attention_type) ⇒ Object
Sets the attention type.
1263 |
# File 'ext/llama_cpp/dummy.rb', line 1263 def attention_type=(attention_type); end |
#defrag_thold ⇒ Float
Returns defragment the KV cache.
1331 |
# File 'ext/llama_cpp/dummy.rb', line 1331 def defrag_thold; end |
#defrag_thold=(defrag_thold) ⇒ Object
Sets the defragment the KV cache.
1327 |
# File 'ext/llama_cpp/dummy.rb', line 1327 def defrag_thold=(defrag_thold); end |
#embeddings ⇒ Boolean
Returns the flag for embeddings mode only.
1363 |
# File 'ext/llama_cpp/dummy.rb', line 1363 def ; end |
#embeddings=(flag) ⇒ Object
Sets the flag for embeddings mode only.
1359 |
# File 'ext/llama_cpp/dummy.rb', line 1359 def (flag); end |
#flash_attn ⇒ Boolean
Returns the flag whether to use flash attention.
1379 |
# File 'ext/llama_cpp/dummy.rb', line 1379 def flash_attn; end |
#flash_attn=(flag) ⇒ Object
Sets the flag whether to use flash attention.
1375 |
# File 'ext/llama_cpp/dummy.rb', line 1375 def flash_attn=(flag); end |
#logits_all ⇒ Boolean
Returns the flag to compute all logits.
1355 |
# File 'ext/llama_cpp/dummy.rb', line 1355 def logits_all; end |
#logits_all=(flag) ⇒ Object
Sets the flag to compute all logits.
1351 |
# File 'ext/llama_cpp/dummy.rb', line 1351 def logits_all=(flag); end |
#n_batch ⇒ Integer
Returns the logical maximum batch size.
1223 |
# File 'ext/llama_cpp/dummy.rb', line 1223 def n_batch; end |
#n_batch=(n_batch) ⇒ Object
Sets the logical maximum batch size.
1218 |
# File 'ext/llama_cpp/dummy.rb', line 1218 def n_batch=(n_batch); end |
#n_ctx ⇒ Integer
Returns the number of text context
1213 |
# File 'ext/llama_cpp/dummy.rb', line 1213 def n_ctx; end |
#n_ctx=(n_ctx) ⇒ Object
Sets the number of text context
1209 |
# File 'ext/llama_cpp/dummy.rb', line 1209 def n_ctx=(n_ctx); end |
#n_seq_max ⇒ Integer
Returns the max number of sequences.
1243 |
# File 'ext/llama_cpp/dummy.rb', line 1243 def n_seq_max; end |
#n_seq_max=(n_seq_max) ⇒ Object
Sets the max number of sequences.
1238 |
# File 'ext/llama_cpp/dummy.rb', line 1238 def n_seq_max=(n_seq_max); end |
#n_ubatch ⇒ Integer
Returns the physical maximum batch size.
1233 |
# File 'ext/llama_cpp/dummy.rb', line 1233 def n_ubatch; end |
#n_ubatch=(n_ubatch) ⇒ Object
Sets the physical maximum batch size.
1228 |
# File 'ext/llama_cpp/dummy.rb', line 1228 def n_ubatch=(n_ubatch); end |
#offload_kqv=(flag) ⇒ Object
Sets the flag whether to offload the KQV ops.
1367 |
# File 'ext/llama_cpp/dummy.rb', line 1367 def offload_kqv=(flag); end |
#offload_kwv ⇒ Boolean
Returns the flag whether to offload the KQV ops.
1371 |
# File 'ext/llama_cpp/dummy.rb', line 1371 def offload_kwv; end |
#pooling_type ⇒ Integer
Returns the pooling type.
1259 |
# File 'ext/llama_cpp/dummy.rb', line 1259 def pooling_type; end |
#pooling_type=(pooling_type) ⇒ Object
Sets the pooling type.
1255 |
# File 'ext/llama_cpp/dummy.rb', line 1255 def pooling_type=(pooling_type); end |
#rope_freq_base ⇒ Float
Returns the RoPE base frequency.
1275 |
# File 'ext/llama_cpp/dummy.rb', line 1275 def rope_freq_base; end |
#rope_freq_base=(rope_freq_base) ⇒ Object
Sets the RoPE base frequency.
1271 |
# File 'ext/llama_cpp/dummy.rb', line 1271 def rope_freq_base=(rope_freq_base); end |
#rope_freq_scale ⇒ Float
Returns the RoPE frequency scaling factor.
1283 |
# File 'ext/llama_cpp/dummy.rb', line 1283 def rope_freq_scale; end |
#rope_freq_scale=(rope_freq_scale) ⇒ Object
Sets the RoPE frequency scaling factor.
1279 |
# File 'ext/llama_cpp/dummy.rb', line 1279 def rope_freq_scale=(rope_freq_scale); end |
#rope_scaling_type ⇒ Integer
Returns the RoPE scaling type.
1251 |
# File 'ext/llama_cpp/dummy.rb', line 1251 def rope_scaling_type; end |
#rope_scaling_type=(scaling_type) ⇒ Object
Sets the RoPE scaling type.
1247 |
# File 'ext/llama_cpp/dummy.rb', line 1247 def rope_scaling_type=(scaling_type); end |
#seed ⇒ Integer
Return the random seed.
1205 |
# File 'ext/llama_cpp/dummy.rb', line 1205 def seed; end |
#seed=(seed) ⇒ Object
Sets the random seed.
1201 |
# File 'ext/llama_cpp/dummy.rb', line 1201 def seed=(seed); end |
#type_k ⇒ Integer
Returns the data type for K cache.
1339 |
# File 'ext/llama_cpp/dummy.rb', line 1339 def type_k; end |
#type_k=(type_k) ⇒ Object
Sets the data type for K cache.
1335 |
# File 'ext/llama_cpp/dummy.rb', line 1335 def type_k=(type_k); end |
#type_v ⇒ Integer
Returns the data type for V cache.
1347 |
# File 'ext/llama_cpp/dummy.rb', line 1347 def type_v; end |
#type_v=(type_v) ⇒ Object
Sets the data type for V cache.
1343 |
# File 'ext/llama_cpp/dummy.rb', line 1343 def type_v=(type_v); end |
#yarn_attn_factor ⇒ Float
Returns the YaRN magnitude scaling factor.
1299 |
# File 'ext/llama_cpp/dummy.rb', line 1299 def yarn_attn_factor; end |
#yarn_attn_factor=(yarn_attn_factor) ⇒ Object
Sets the YaRN magnitude scaling factor.
1295 |
# File 'ext/llama_cpp/dummy.rb', line 1295 def yarn_attn_factor=(yarn_attn_factor); end |
#yarn_beta_fast ⇒ Float
Returns the YaRN low correction dim.
1307 |
# File 'ext/llama_cpp/dummy.rb', line 1307 def yarn_beta_fast; end |
#yarn_beta_fast=(yarn_beta_fast) ⇒ Object
Sets the YaRN low correction dim.
1303 |
# File 'ext/llama_cpp/dummy.rb', line 1303 def yarn_beta_fast=(yarn_beta_fast); end |
#yarn_beta_slow ⇒ Float
Returns the YaRN high correction dim.
1315 |
# File 'ext/llama_cpp/dummy.rb', line 1315 def yarn_beta_slow; end |
#yarn_beta_slow=(yarn_beta_slow) ⇒ Object
Sets the YaRN high correction dim.
1311 |
# File 'ext/llama_cpp/dummy.rb', line 1311 def yarn_beta_slow=(yarn_beta_slow); end |
#yarn_ext_factor ⇒ Float
Returns the YaRN extrapolation mix factor.
1291 |
# File 'ext/llama_cpp/dummy.rb', line 1291 def yarn_ext_factor; end |
#yarn_ext_factor=(yarn_ext_factor) ⇒ Object
Sets the YaRN extrapolation mix factor.
1287 |
# File 'ext/llama_cpp/dummy.rb', line 1287 def yarn_ext_factor=(yarn_ext_factor); end |
#yarn_orig_ctx ⇒ Integer
Returns the YaRN original context size.
1323 |
# File 'ext/llama_cpp/dummy.rb', line 1323 def yarn_orig_ctx; end |
#yarn_orig_ctx=(yarn_orig_ctx) ⇒ Object
Sets the YaRN original context size.
1319 |
# File 'ext/llama_cpp/dummy.rb', line 1319 def yarn_orig_ctx=(yarn_orig_ctx); end |