Commit Graph
Select branches
Hide Pull Requests
add-chat-response-format
add-google-cloud-provider
add-quickstart-script
add-rotary-embed-tests
add-small-ttft-script
add-test-for-warmup-and-kvcache
add_api_key
add_batch_dimension
add_gptq_docs
add_integration_test
add_readme_dashboard
add_tunable_prefill
adding_docs
adjust-where-request-max-tokens-is-defaulted
amd-ci-fx
auto_length
auto_max_prefill
automodel-supports-flash-paged-attention
avoid-cuda-graph-during-warmup-if-oom
avoid-zero-seed
backends/trtllm
backends/trtllm-executor
bnb4
bugfix/add_tools_prompt
bugfix/moe-kernels-imports
bugfix/phi-exl2
bump-client-0.6.2
bump-poetry-and-requirements
ci-amihalik-update-chat-completion-messages
ci-new-cluster
ci-patch
ci-run-openai-function-calling-compatible-support
ci-xpu
ci-xpu2
ci2
ci_amd
ci_amd2
ci_amd3
ci_amd4
close_dl_thread
compat_logger
cuda_ipc_allreduce
debug-gpt2
debug-request-id
debug-torch-23
debug/gemma2
deploy/aml
dev
development-guide
docs/readme-nix
dummy
enable-non-grammar-constrained-tools
enable-qwen2vl-video
enable_non_divisible_embeddings
exl2
experiment/moe
explore-static-triton-kernels
explore-t4-gemma-issues
feat-backend-llamacpp
feat/add-load-test
feat/attention_sinks
feat/backend_abstraction
feat/backend_feature
feat/better_tokens
feat/ci-benchmarks
feat/cuda_12
feat/flash_decoding
feat/improve_max_tokens
feat/limit
feat/max_queue_size
feat/page_re_alloc
feat/parse_logs
feat/support_deepspeed
feature/compressed-tensors-kv-cache-fp8
feature/machete
feature/moe-kernels
feature/no_repeat_ngram_size
feature/no_repeat_ngram_size_ci
feature/phi-3-small
feature/prefix
feature/radix-prefix-cache
feature/radix-prefix-cache-bench
feature/vlm-prefix-caching
fix-cudagraph-bug
fix-gemma-tokenization
fix-grammar-cleanup-bug
fix-grammar-fsm-batching
fix-main-trtllm
fix-mixtral-adapter-loading
fix-release-tests
fix-repack-for-marlin
fix-version-install
fix/allow-top-p-0
fix/avoid_record_streams
fix/op-trace-id
fix/parse-mamba-config
fix_default_arg
fix_exl2
fix_leak
fix_mistral2
fix_neox_rotary_emb
fix_phi3
flashinfer
fp8_kvcache
fp8_rocm
git_2.0.4
git_v2.1.0
git_v2.1.1
git_v2.2.0
git_v2.3.0
git_v2.3.1
git_v2.4.0
improve-docs
improve-dynamic-message-content
improve-vlm-support
improve_defaults
improve_launcher_defaults
inlcude-latest-release-on-commit-builds-tags
ipex-moe
llama-fused-compiled-mlp
main
maintenance/docker-network
maintenance/merge-vlm-input-prep
mamba2
martinigoyanes-fix-frequency-penalty
medusa
megatron
mi300-temp
mllama
model_compat_log
multi-lora
nix/cargo-clippy
nix/docker2
nix_integration_tests
nix_test2
op-compilation-benchmarking
osanseviero-patch-1
pip-installable
pr-1869-ci-run
pr-2076-ci-run
pr-2290-ci-runner
pr-2366-ci-branch
pr-2444-ci-branch
pr-2517-ci-branch
pr-2711-ci-branch
precompile-kernels-workflow
prefix_chunk
prefix_default
quantization
refactor-lora-linear
remove_post_load_weights
revert
rocm-ci-build
rocm_6.2_fixes
router-grammar-compile
self-generating-docs
set-num-blocks
simpler_exllama
skip-mistral-test
speculative
streaming_conceptual
support-continue-final-message
support-phi-model
support-phi3-small
support-pre-compile-kernels
temp_work
test-batch-speedup-amount
test_docs
test_rocm
tmp_medusa
tmp_torch_compile
trtllm-stop-words
trtllm/cancellation
update_docs2
update_internal_version
update_peft
update_readme
upgrade-outlines
upgrade_mlp_speculator
use_g6
#1
#100
#101
#1010
#1018
#1019
#102
#1022
#1023
#1024
#103
#1033
#1034
#1042
#1044
#1045
#1048
#1049
#1052
#1054
#1058
#1059
#106
#1060
#1061
#1063
#1064
#1065
#1066
#1068
#107
#1070
#1071
#1075
#1076
#1077
#108
#1080
#1081
#1089
#109
#1090
#1091
#1092
#1094
#1096
#1097
#1099
#11
#110
#1100
#1101
#1102
#1103
#1105
#1110
#1112
#1116
#1123
#1128
#1134
#114
#1140
#1141
#115
#1153
#1155
#116
#1165
#1165
#117
#1173
#1176
#1178
#1179
#118
#1182
#1183
#1184
#1187
#119
#1198
#1202
#1211
#1214
#1219
#122
#1224
#1228
#123
#1239
#1241
#1242
#1243
#1246
#1252
#126
#1260
#1267
#1270
#1272
#1274
#1276
#1279
#128
#1285
#1287
#129
#1294
#1295
#13
#130
#1301
#1305
#1307
#1308
#1313
#132
#1326
#1328
#133
#1336
#1337
#134
#1341
#1343
#1346
#1347
#1348
#135
#1351
#1352
#1353
#1358
#136
#1361
#1364
#137
#1370
#1373
#138
#1381
#1386
#139
#1390
#1395
#14
#140
#1408
#141
#1414
#1419
#142
#1420
#1424
#1425
#1427
#1428
#143
#1436
#144
#1442
#1448
#145
#1450
#1453
#1454
#1455
#1459
#1461
#1462
#1463
#1469
#147
#1470
#1471
#1473
#1475
#1476
#1477
#1478
#148
#1480
#1484
#1486
#1488
#1489
#149
#1490
#1491
#1492
#1494
#1495
#1496
#1497
#1498
#15
#150
#1502
#1504
#1505
#1506
#151
#1511
#1512
#1514
#1515
#1516
#1517
#1518
#152
#1520
#1523
#1524
#1526
#1527
#153
#1532
#1533
#1537
#1539
#154
#1540
#1541
#1542
#1543
#155
#1550
#1552
#1555
#1556
#1557
#1560
#1563
#1564
#1567
#1568
#1569
#1570
#1571
#1576
#1577
#1578
#1579
#1580
#1583
#1584
#1585
#1586
#1587
#1588
#159
#1591
#1592
#1594
#16
#160
#1603
#1605
#1606
#1607
#1608
#1609
#161
#1610
#1614
#1617
#1618
#1619
#162
#1621
#1626
#1628
#163
#1632
#1637
#1638
#1639
#164
#1646
#1648
#1650
#1651
#1653
#1658
#1660
#1662
#1663
#1664
#1666
#1667
#1668
#167
#1676
#168
#1682
#1685
#1686
#1693
#1697
#1698
#17
#170
#1702
#1703
#1704
#1707
#1708
#1709
#1710
#1713
#1714
#1715
#1716
#1718
#1719
#1726
#1727
#1729
#173
#1730
#1731
#1734
#1735
#1736
#1737
#1739
#174
#1740
#1747
#1748
#1749
#175
#1751
#1755
#1756
#1758
#1759
#1760
#1764
#1765
#1767
#1768
#1773
#1774
#1775
#178
#1784
#1789
#179
#1790
#1791
#1797
#1798
#18
#180
#1800
#1801
#1808
#181
#1811
#1812
#1813
#1815
#1816
#1817
#1818
#1820
#1825
#1827
#1828
#1829
#183
#1830
#1832
#1833
#1835
#1836
#1839
#184
#1840
#1841
#1843
#1844
#1845
#1848
#1849
#185
#1850
#1851
#1854
#1855
#186
#1860
#1865
#1866
#1869
#187
#1882
#1884
#1886
#1888
#1889
#1890
#1892
#1894
#1895
#1898
#19
#190
#1902
#1906
#1908
#1909
#191
#1910
#1912
#1915
#1916
#1917
#1918
#1919
#1920
#1921
#1923
#1924
#1925
#1929
#193
#1931
#1932
#1934
#1935
#1936
#1937
#1938
#1939
#194
#1940
#1942
#1947
#1948
#1949
#1950
#1951
#1953
#1954
#1958
#1959
#196
#1963
#1965
#1967
#1970
#1971
#1975
#1980
#1981
#1985
#1986
#1988
#1989
#1990
#1994
#1995
#1996
#2
#20
#200
#2002
#2003
#2004
#2005
#2006
#2008
#201
#2010
#2011
#2013
#2014
#2015
#2016
#2017
#202
#2020
#2021
#2022
#2023
#2024
#2028
#203
#2031
#2032
#2033
#2034
#2038
#2039
#2044
#2045
#2046
#2047
#2049
#205
#2050
#2052
#2054
#2059
#2060
#2061
#2062
#2063
#2065
#2066
#2068
#207
#2071
#2072
#2074
#2075
#2076
#2078
#2079
#208
#2080
#2084
#2085
#2086
#2087
#2088
#2089
#2091
#2092
#2094
#2095
#2097
#2099
#210
#2101
#2102
#2103
#2104
#2105
#2109
#2110
#2111
#2114
#2115
#2116
#2117
#2118
#2119
#212
#2120
#2123
#2124
#2125
#2127
#2128
#2129
#213
#2131
#2132
#2133
#2134
#2135
#2137
#2138
#214
#2140
#2141
#2142
#2148
#2149
#215
#2151
#2152
#2153
#2155
#2156
#2158
#216
#2161
#2163
#2164
#2165
#2166
#2167
#2168
#2169
#217
#2170
#2173
#2175
#2176
#2178
#2179
#218
#2180
#2181
#2182
#2186
#2187
#2189
#219
#2190
#2191
#2193
#2194
#2196
#22
#220
#2201
#2202
#2203
#2204
#2205
#2208
#2209
#221
#2212
#2213
#2215
#2216
#2217
#222
#2220
#2221
#2224
#2225
#2226
#2228
#2230
#2231
#2232
#2233
#2237
#2242
#2243
#2244
#2245
#2248
#2249
#2250
#2251
#2254
#2255
#2256
#2257
#2258
#2259
#226
#2260
#2261
#2262
#2266
#2268
#2269
#227
#2271
#2272
#2273
#2274
#2276
#2277
#2278
#2279
#228
#2281
#2282
#2283
#2284
#2285
#2286
#2287
#2288
#2289
#2290
#2291
#2292
#2298
#2299
#23
#2300
#2303
#2304
#2306
#2307
#2308
#2309
#2311
#2311
#2313
#2314
#2315
#2317
#2320
#2323
#2325
#2326
#2327
#2328
#2329
#233
#2330
#2331
#2333
#2335
#2336
#2337
#2338
#2339
#234
#2341
#2342
#2343
#2344
#2345
#2346
#2347
#235
#2350
#2351
#2352
#2353
#2354
#2357
#2358
#2359
#2360
#2361
#2364
#2365
#2366
#2367
#2368
#237
#2370
#2371
#2372
#2374
#2377
#2378
#2379
#2381
#2382
#2384
#2385
#2386
#2387
#2389
#2390
#2391
#2392
#2394
#2395
#2396
#2397
#2398
#2399
#24
#2401
#2402
#2403
#2404
#2405
#2406
#2407
#2408
#2410
#2411
#2412
#2414
#2415
#2416
#2419
#242
#2420
#2422
#2423
#2424
#2426
#2427
#2428
#2429
#2430
#2431
#2433
#2437
#2437
#2438
#2439
#244
#2442
#2443
#2444
#2449
#2450
#2451
#2453
#2454
#2455
#2459
#246
#2462
#2463
#2468
#2469
#2470
#2471
#2472
#2473
#2477
#2478
#2479
#248
#2481
#2482
#2484
#2486
#2489
#2490
#2491
#2492
#2493
#2494
#2496
#2497
#2498
#2499
#25
#250
#2500
#2501
#2507
#251
#2510
#2511
#2512
#2513
#2514
#2515
#2516
#2517
#2518
#2519
#252
#2520
#2521
#2524
#2525
#2527
#2528
#2529
#2532
#2533
#2535
#2536
#2537
#2538
#2539
#2540
#2545
#2546
#2547
#2548
#255
#2550
#2551
#2552
#2553
#2554
#2555
#2556
#2557
#2558
#2561
#2562
#2563
#2566
#2567
#2568
#257
#2574
#2575
#2577
#2578
#2579
#258
#2580
#2582
#2585
#2586
#2587
#2588
#2589
#259
#2590
#2591
#2591
#2592
#2594
#2595
#2596
#2597
#2599
#26
#2600
#2601
#2602
#2603
#2604
#2605
#2606
#2607
#2609
#261
#2610
#2611
#2612
#2612
#2614
#2616
#2617
#2619
#262
#2620
#2622
#2623
#2625
#2627
#2628
#2629
#2630
#2631
#2632
#2633
#2634
#2634
#2637
#264
#2640
#2642
#2642
#2645
#2646
#2647
#2648
#2648
#2650
#2651
#2652
#2655
#2658
#2659
#266
#2661
#2663
#2664
#2665
#2666
#2668
#267
#2673
#2674
#2677
#2678
#2680
#2682
#2683
#2684
#2685
#2686
#2687
#2688
#2689
#269
#2690
#2691
#2692
#2693
#2694
#2695
#2697
#2698
#2699
#27
#2701
#2702
#2704
#2706
#2707
#2708
#2709
#2710
#2711
#2712
#2713
#2714
#2716
#2717
#2718
#2719
#272
#2720
#2721
#2723
#2723
#2724
#2725
#2726
#2726
#2732
#2733
#2733
#2738
#274
#2740
#2741
#2742
#2743
#2745
#2746
#275
#2750
#2751
#2753
#2754
#2754
#2755
#2756
#2756
#2758
#276
#2760
#2761
#2761
#2762
#2762
#2764
#2765
#2766
#2767
#2767
#2768
#277
#278
#28
#282
#284
#285
#286
#287
#29
#292
#294
#297
#298
#299
#30
#302
#303
#304
#305
#308
#31
#310
#313
#317
#318
#32
#325
#327
#328
#329
#33
#334
#335
#336
#34
#340
#341
#343
#344
#348
#35
#351
#352
#353
#356
#357
#358
#359
#36
#360
#362
#363
#364
#367
#368
#37
#370
#373
#379
#384
#385
#388
#39
#393
#394
#395
#396
#4
#40
#400
#404
#406
#407
#41
#411
#412
#42
#434
#438
#44
#441
#443
#45
#453
#46
#462
#465
#47
#470
#472
#475
#477
#48
#480
#483
#485
#488
#49
#498
#5
#50
#501
#502
#51
#513
#514
#516
#519
#52
#520
#521
#522
#525
#529
#53
#534
#54
#543
#544
#545
#55
#550
#553
#557
#558
#56
#561
#562
#567
#57
#575
#578
#579
#58
#580
#581
#582
#583
#585
#586
#587
#588
#59
#590
#595
#596
#6
#60
#600
#605
#608
#609
#61
#611
#616
#617
#618
#619
#62
#621
#623
#624
#626
#63
#630
#633
#634
#635
#639
#64
#642
#643
#647
#648
#659
#66
#661
#664
#665
#666
#67
#670
#671
#678
#68
#684
#689
#698
#7
#70
#704
#708
#71
#712
#713
#715
#719
#72
#721
#723
#725
#727
#73
#733
#737
#738
#740
#741
#743
#745
#746
#748
#75
#750
#76
#761
#762
#767
#768
#770
#773
#783
#785
#789
#791
#793
#794
#795
#797
#798
#799
#8
#803
#805
#806
#809
#810
#812
#82
#820
#821
#822
#823
#829
#831
#836
#838
#84
#842
#848
#85
#851
#852
#853
#854
#858
#86
#860
#862
#867
#868
#87
#872
#88
#881
#884
#886
#889
#89
#892
#893
#898
#9
#90
#900
#901
#905
#906
#91
#910
#911
#918
#921
#93
#930
#932
#935
#94
#941
#947
#95
#950
#951
#953
#954
#957
#958
#96
#963
#964
#966
#968
#97
#971
#977
#981
#986
#989
#990
#993
#994
#999
v0.2.0
v0.2.1
v0.3.0
v0.3.1
v0.3.2
v0.4.0
v0.4.1
v0.4.2
v0.4.3
v0.5.0
v0.6.0
v0.7.0
v0.8.0
v0.8.1
v0.8.2
v0.9.0
v0.9.1
v0.9.2
v0.9.3
v0.9.4
v1.0.0
v1.0.1
v1.0.2
v1.0.3
v1.1.0
v1.1.1
v1.2.0
v1.3.0
v1.3.1
v1.3.2
v1.3.3
v1.3.4
v1.4.0
v1.4.1
v1.4.2
v1.4.3
v1.4.4
v1.4.5
v2.0.0
v2.0.1
v2.0.2
v2.0.3
v2.0.4
v2.1.0
v2.1.1
v2.2.0
v2.3.0
v2.3.1
v2.4.0
Select branches
Hide Pull Requests
add-chat-response-format
add-google-cloud-provider
add-quickstart-script
add-rotary-embed-tests
add-small-ttft-script
add-test-for-warmup-and-kvcache
add_api_key
add_batch_dimension
add_gptq_docs
add_integration_test
add_readme_dashboard
add_tunable_prefill
adding_docs
adjust-where-request-max-tokens-is-defaulted
amd-ci-fx
auto_length
auto_max_prefill
automodel-supports-flash-paged-attention
avoid-cuda-graph-during-warmup-if-oom
avoid-zero-seed
backends/trtllm
backends/trtllm-executor
bnb4
bugfix/add_tools_prompt
bugfix/moe-kernels-imports
bugfix/phi-exl2
bump-client-0.6.2
bump-poetry-and-requirements
ci-amihalik-update-chat-completion-messages
ci-new-cluster
ci-patch
ci-run-openai-function-calling-compatible-support
ci-xpu
ci-xpu2
ci2
ci_amd
ci_amd2
ci_amd3
ci_amd4
close_dl_thread
compat_logger
cuda_ipc_allreduce
debug-gpt2
debug-request-id
debug-torch-23
debug/gemma2
deploy/aml
dev
development-guide
docs/readme-nix
dummy
enable-non-grammar-constrained-tools
enable-qwen2vl-video
enable_non_divisible_embeddings
exl2
experiment/moe
explore-static-triton-kernels
explore-t4-gemma-issues
feat-backend-llamacpp
feat/add-load-test
feat/attention_sinks
feat/backend_abstraction
feat/backend_feature
feat/better_tokens
feat/ci-benchmarks
feat/cuda_12
feat/flash_decoding
feat/improve_max_tokens
feat/limit
feat/max_queue_size
feat/page_re_alloc
feat/parse_logs
feat/support_deepspeed
feature/compressed-tensors-kv-cache-fp8
feature/machete
feature/moe-kernels
feature/no_repeat_ngram_size
feature/no_repeat_ngram_size_ci
feature/phi-3-small
feature/prefix
feature/radix-prefix-cache
feature/radix-prefix-cache-bench
feature/vlm-prefix-caching
fix-cudagraph-bug
fix-gemma-tokenization
fix-grammar-cleanup-bug
fix-grammar-fsm-batching
fix-main-trtllm
fix-mixtral-adapter-loading
fix-release-tests
fix-repack-for-marlin
fix-version-install
fix/allow-top-p-0
fix/avoid_record_streams
fix/op-trace-id
fix/parse-mamba-config
fix_default_arg
fix_exl2
fix_leak
fix_mistral2
fix_neox_rotary_emb
fix_phi3
flashinfer
fp8_kvcache
fp8_rocm
git_2.0.4
git_v2.1.0
git_v2.1.1
git_v2.2.0
git_v2.3.0
git_v2.3.1
git_v2.4.0
improve-docs
improve-dynamic-message-content
improve-vlm-support
improve_defaults
improve_launcher_defaults
inlcude-latest-release-on-commit-builds-tags
ipex-moe
llama-fused-compiled-mlp
main
maintenance/docker-network
maintenance/merge-vlm-input-prep
mamba2
martinigoyanes-fix-frequency-penalty
medusa
megatron
mi300-temp
mllama
model_compat_log
multi-lora
nix/cargo-clippy
nix/docker2
nix_integration_tests
nix_test2
op-compilation-benchmarking
osanseviero-patch-1
pip-installable
pr-1869-ci-run
pr-2076-ci-run
pr-2290-ci-runner
pr-2366-ci-branch
pr-2444-ci-branch
pr-2517-ci-branch
pr-2711-ci-branch
precompile-kernels-workflow
prefix_chunk
prefix_default
quantization
refactor-lora-linear
remove_post_load_weights
revert
rocm-ci-build
rocm_6.2_fixes
router-grammar-compile
self-generating-docs
set-num-blocks
simpler_exllama
skip-mistral-test
speculative
streaming_conceptual
support-continue-final-message
support-phi-model
support-phi3-small
support-pre-compile-kernels
temp_work
test-batch-speedup-amount
test_docs
test_rocm
tmp_medusa
tmp_torch_compile
trtllm-stop-words
trtllm/cancellation
update_docs2
update_internal_version
update_peft
update_readme
upgrade-outlines
upgrade_mlp_speculator
use_g6
#1
#100
#101
#1010
#1018
#1019
#102
#1022
#1023
#1024
#103
#1033
#1034
#1042
#1044
#1045
#1048
#1049
#1052
#1054
#1058
#1059
#106
#1060
#1061
#1063
#1064
#1065
#1066
#1068
#107
#1070
#1071
#1075
#1076
#1077
#108
#1080
#1081
#1089
#109
#1090
#1091
#1092
#1094
#1096
#1097
#1099
#11
#110
#1100
#1101
#1102
#1103
#1105
#1110
#1112
#1116
#1123
#1128
#1134
#114
#1140
#1141
#115
#1153
#1155
#116
#1165
#1165
#117
#1173
#1176
#1178
#1179
#118
#1182
#1183
#1184
#1187
#119
#1198
#1202
#1211
#1214
#1219
#122
#1224
#1228
#123
#1239
#1241
#1242
#1243
#1246
#1252
#126
#1260
#1267
#1270
#1272
#1274
#1276
#1279
#128
#1285
#1287
#129
#1294
#1295
#13
#130
#1301
#1305
#1307
#1308
#1313
#132
#1326
#1328
#133
#1336
#1337
#134
#1341
#1343
#1346
#1347
#1348
#135
#1351
#1352
#1353
#1358
#136
#1361
#1364
#137
#1370
#1373
#138
#1381
#1386
#139
#1390
#1395
#14
#140
#1408
#141
#1414
#1419
#142
#1420
#1424
#1425
#1427
#1428
#143
#1436
#144
#1442
#1448
#145
#1450
#1453
#1454
#1455
#1459
#1461
#1462
#1463
#1469
#147
#1470
#1471
#1473
#1475
#1476
#1477
#1478
#148
#1480
#1484
#1486
#1488
#1489
#149
#1490
#1491
#1492
#1494
#1495
#1496
#1497
#1498
#15
#150
#1502
#1504
#1505
#1506
#151
#1511
#1512
#1514
#1515
#1516
#1517
#1518
#152
#1520
#1523
#1524
#1526
#1527
#153
#1532
#1533
#1537
#1539
#154
#1540
#1541
#1542
#1543
#155
#1550
#1552
#1555
#1556
#1557
#1560
#1563
#1564
#1567
#1568
#1569
#1570
#1571
#1576
#1577
#1578
#1579
#1580
#1583
#1584
#1585
#1586
#1587
#1588
#159
#1591
#1592
#1594
#16
#160
#1603
#1605
#1606
#1607
#1608
#1609
#161
#1610
#1614
#1617
#1618
#1619
#162
#1621
#1626
#1628
#163
#1632
#1637
#1638
#1639
#164
#1646
#1648
#1650
#1651
#1653
#1658
#1660
#1662
#1663
#1664
#1666
#1667
#1668
#167
#1676
#168
#1682
#1685
#1686
#1693
#1697
#1698
#17
#170
#1702
#1703
#1704
#1707
#1708
#1709
#1710
#1713
#1714
#1715
#1716
#1718
#1719
#1726
#1727
#1729
#173
#1730
#1731
#1734
#1735
#1736
#1737
#1739
#174
#1740
#1747
#1748
#1749
#175
#1751
#1755
#1756
#1758
#1759
#1760
#1764
#1765
#1767
#1768
#1773
#1774
#1775
#178
#1784
#1789
#179
#1790
#1791
#1797
#1798
#18
#180
#1800
#1801
#1808
#181
#1811
#1812
#1813
#1815
#1816
#1817
#1818
#1820
#1825
#1827
#1828
#1829
#183
#1830
#1832
#1833
#1835
#1836
#1839
#184
#1840
#1841
#1843
#1844
#1845
#1848
#1849
#185
#1850
#1851
#1854
#1855
#186
#1860
#1865
#1866
#1869
#187
#1882
#1884
#1886
#1888
#1889
#1890
#1892
#1894
#1895
#1898
#19
#190
#1902
#1906
#1908
#1909
#191
#1910
#1912
#1915
#1916
#1917
#1918
#1919
#1920
#1921
#1923
#1924
#1925
#1929
#193
#1931
#1932
#1934
#1935
#1936
#1937
#1938
#1939
#194
#1940
#1942
#1947
#1948
#1949
#1950
#1951
#1953
#1954
#1958
#1959
#196
#1963
#1965
#1967
#1970
#1971
#1975
#1980
#1981
#1985
#1986
#1988
#1989
#1990
#1994
#1995
#1996
#2
#20
#200
#2002
#2003
#2004
#2005
#2006
#2008
#201
#2010
#2011
#2013
#2014
#2015
#2016
#2017
#202
#2020
#2021
#2022
#2023
#2024
#2028
#203
#2031
#2032
#2033
#2034
#2038
#2039
#2044
#2045
#2046
#2047
#2049
#205
#2050
#2052
#2054
#2059
#2060
#2061
#2062
#2063
#2065
#2066
#2068
#207
#2071
#2072
#2074
#2075
#2076
#2078
#2079
#208
#2080
#2084
#2085
#2086
#2087
#2088
#2089
#2091
#2092
#2094
#2095
#2097
#2099
#210
#2101
#2102
#2103
#2104
#2105
#2109
#2110
#2111
#2114
#2115
#2116
#2117
#2118
#2119
#212
#2120
#2123
#2124
#2125
#2127
#2128
#2129
#213
#2131
#2132
#2133
#2134
#2135
#2137
#2138
#214
#2140
#2141
#2142
#2148
#2149
#215
#2151
#2152
#2153
#2155
#2156
#2158
#216
#2161
#2163
#2164
#2165
#2166
#2167
#2168
#2169
#217
#2170
#2173
#2175
#2176
#2178
#2179
#218
#2180
#2181
#2182
#2186
#2187
#2189
#219
#2190
#2191
#2193
#2194
#2196
#22
#220
#2201
#2202
#2203
#2204
#2205
#2208
#2209
#221
#2212
#2213
#2215
#2216
#2217
#222
#2220
#2221
#2224
#2225
#2226
#2228
#2230
#2231
#2232
#2233
#2237
#2242
#2243
#2244
#2245
#2248
#2249
#2250
#2251
#2254
#2255
#2256
#2257
#2258
#2259
#226
#2260
#2261
#2262
#2266
#2268
#2269
#227
#2271
#2272
#2273
#2274
#2276
#2277
#2278
#2279
#228
#2281
#2282
#2283
#2284
#2285
#2286
#2287
#2288
#2289
#2290
#2291
#2292
#2298
#2299
#23
#2300
#2303
#2304
#2306
#2307
#2308
#2309
#2311
#2311
#2313
#2314
#2315
#2317
#2320
#2323
#2325
#2326
#2327
#2328
#2329
#233
#2330
#2331
#2333
#2335
#2336
#2337
#2338
#2339
#234
#2341
#2342
#2343
#2344
#2345
#2346
#2347
#235
#2350
#2351
#2352
#2353
#2354
#2357
#2358
#2359
#2360
#2361
#2364
#2365
#2366
#2367
#2368
#237
#2370
#2371
#2372
#2374
#2377
#2378
#2379
#2381
#2382
#2384
#2385
#2386
#2387
#2389
#2390
#2391
#2392
#2394
#2395
#2396
#2397
#2398
#2399
#24
#2401
#2402
#2403
#2404
#2405
#2406
#2407
#2408
#2410
#2411
#2412
#2414
#2415
#2416
#2419
#242
#2420
#2422
#2423
#2424
#2426
#2427
#2428
#2429
#2430
#2431
#2433
#2437
#2437
#2438
#2439
#244
#2442
#2443
#2444
#2449
#2450
#2451
#2453
#2454
#2455
#2459
#246
#2462
#2463
#2468
#2469
#2470
#2471
#2472
#2473
#2477
#2478
#2479
#248
#2481
#2482
#2484
#2486
#2489
#2490
#2491
#2492
#2493
#2494
#2496
#2497
#2498
#2499
#25
#250
#2500
#2501
#2507
#251
#2510
#2511
#2512
#2513
#2514
#2515
#2516
#2517
#2518
#2519
#252
#2520
#2521
#2524
#2525
#2527
#2528
#2529
#2532
#2533
#2535
#2536
#2537
#2538
#2539
#2540
#2545
#2546
#2547
#2548
#255
#2550
#2551
#2552
#2553
#2554
#2555
#2556
#2557
#2558
#2561
#2562
#2563
#2566
#2567
#2568
#257
#2574
#2575
#2577
#2578
#2579
#258
#2580
#2582
#2585
#2586
#2587
#2588
#2589
#259
#2590
#2591
#2591
#2592
#2594
#2595
#2596
#2597
#2599
#26
#2600
#2601
#2602
#2603
#2604
#2605
#2606
#2607
#2609
#261
#2610
#2611
#2612
#2612
#2614
#2616
#2617
#2619
#262
#2620
#2622
#2623
#2625
#2627
#2628
#2629
#2630
#2631
#2632
#2633
#2634
#2634
#2637
#264
#2640
#2642
#2642
#2645
#2646
#2647
#2648
#2648
#2650
#2651
#2652
#2655
#2658
#2659
#266
#2661
#2663
#2664
#2665
#2666
#2668
#267
#2673
#2674
#2677
#2678
#2680
#2682
#2683
#2684
#2685
#2686
#2687
#2688
#2689
#269
#2690
#2691
#2692
#2693
#2694
#2695
#2697
#2698
#2699
#27
#2701
#2702
#2704
#2706
#2707
#2708
#2709
#2710
#2711
#2712
#2713
#2714
#2716
#2717
#2718
#2719
#272
#2720
#2721
#2723
#2723
#2724
#2725
#2726
#2726
#2732
#2733
#2733
#2738
#274
#2740
#2741
#2742
#2743
#2745
#2746
#275
#2750
#2751
#2753
#2754
#2754
#2755
#2756
#2756
#2758
#276
#2760
#2761
#2761
#2762
#2762
#2764
#2765
#2766
#2767
#2767
#2768
#277
#278
#28
#282
#284
#285
#286
#287
#29
#292
#294
#297
#298
#299
#30
#302
#303
#304
#305
#308
#31
#310
#313
#317
#318
#32
#325
#327
#328
#329
#33
#334
#335
#336
#34
#340
#341
#343
#344
#348
#35
#351
#352
#353
#356
#357
#358
#359
#36
#360
#362
#363
#364
#367
#368
#37
#370
#373
#379
#384
#385
#388
#39
#393
#394
#395
#396
#4
#40
#400
#404
#406
#407
#41
#411
#412
#42
#434
#438
#44
#441
#443
#45
#453
#46
#462
#465
#47
#470
#472
#475
#477
#48
#480
#483
#485
#488
#49
#498
#5
#50
#501
#502
#51
#513
#514
#516
#519
#52
#520
#521
#522
#525
#529
#53
#534
#54
#543
#544
#545
#55
#550
#553
#557
#558
#56
#561
#562
#567
#57
#575
#578
#579
#58
#580
#581
#582
#583
#585
#586
#587
#588
#59
#590
#595
#596
#6
#60
#600
#605
#608
#609
#61
#611
#616
#617
#618
#619
#62
#621
#623
#624
#626
#63
#630
#633
#634
#635
#639
#64
#642
#643
#647
#648
#659
#66
#661
#664
#665
#666
#67
#670
#671
#678
#68
#684
#689
#698
#7
#70
#704
#708
#71
#712
#713
#715
#719
#72
#721
#723
#725
#727
#73
#733
#737
#738
#740
#741
#743
#745
#746
#748
#75
#750
#76
#761
#762
#767
#768
#770
#773
#783
#785
#789
#791
#793
#794
#795
#797
#798
#799
#8
#803
#805
#806
#809
#810
#812
#82
#820
#821
#822
#823
#829
#831
#836
#838
#84
#842
#848
#85
#851
#852
#853
#854
#858
#86
#860
#862
#867
#868
#87
#872
#88
#881
#884
#886
#889
#89
#892
#893
#898
#9
#90
#900
#901
#905
#906
#91
#910
#911
#918
#921
#93
#930
#932
#935
#94
#941
#947
#95
#950
#951
#953
#954
#957
#958
#96
#963
#964
#966
#968
#97
#971
#977
#981
#986
#989
#990
#993
#994
#999
v0.2.0
v0.2.1
v0.3.0
v0.3.1
v0.3.2
v0.4.0
v0.4.1
v0.4.2
v0.4.3
v0.5.0
v0.6.0
v0.7.0
v0.8.0
v0.8.1
v0.8.2
v0.9.0
v0.9.1
v0.9.2
v0.9.3
v0.9.4
v1.0.0
v1.0.1
v1.0.2
v1.0.3
v1.1.0
v1.1.1
v1.2.0
v1.3.0
v1.3.1
v1.3.2
v1.3.3
v1.3.4
v1.4.0
v1.4.1
v1.4.2
v1.4.3
v1.4.4
v1.4.5
v2.0.0
v2.0.1
v2.0.2
v2.0.3
v2.0.4
v2.1.0
v2.1.1
v2.2.0
v2.3.0
v2.3.1
v2.4.0
-
e5ad76ae6e
87004ae711
into3c54488638
#2762 Lucain2024-11-21 21:32:59 +0900 -
3c54488638
2024-11-21 13:00:26 +0100 -
2a68d6db09
nix: downgrade to outlines 0.1.3
#2768
Daniël de Kok
2024-11-21 11:21:23 +0000 -
b7c885bf05
6297f1769f
into6ee8d6dd3b
#2726 OlivierDehaene2024-11-21 11:12:01 +0100 -
8aefa4ca8c
af546505ad
into6ee8d6dd3b
#2642 Drew Paettie2024-11-21 10:12:51 +0100 -
9809628563
56e3b65c46
into6ee8d6dd3b
#2767 Daniël de Kok2024-11-21 10:00:56 +0100 -
56e3b65c46
Add a README section about using Nix
#2767
docs/readme-nix
Daniël de Kok
2024-11-21 08:53:16 +0000 -
42554dc9de
5f52e2e38e
into6ee8d6dd3b
#2754 Wang, Yi2024-11-21 10:40:21 +0800 -
6ee8d6dd3b
2024-11-20 18:09:39 -0500 -
c57c6856ab
5335bf973b
into07bed530f7
#2723 Funtowicz Morgan2024-11-20 23:03:12 +0000 -
5335bf973b
feat(backend): multistream inference on CPU
#2723
feat-backend-llamacpp
Morgan Funtowicz
2024-11-21 00:03:05 +0100 -
5b419b889c
0fd2ab3e89
into07bed530f7
#2756 drbh2024-11-20 17:01:38 -0500 -
77dc0c266e
fa577c9be2
into07bed530f7
#2733 drbh2024-11-20 17:01:25 -0500 -
613fa03b63
fix: set outlines version to 0.1.3 to avoid bug
#2766
drbh
2024-11-20 16:57:08 -0500 -
07bed530f7
2024-11-20 20:56:11 +0100 -
aa46309f8d
Fix Nix build, disable pure shell (covered by Nix tests)
#2765
Daniël de Kok
2024-11-20 19:30:30 +0000 -
45c6ae6dd3
nix: add poetry to the impure shell
Daniël de Kok
2024-11-20 18:59:17 +0000 -
98db89b8b6
nix: build and cache all devshells
Daniël de Kok
2024-11-20 18:43:31 +0000 -
46a5a7e73e
2024-11-20 18:25:23 +0100 -
2fda8845a7
2024-11-20 18:24:29 +0100 -
80cfe1b16c
nix: update for outlines 0.1.4
#2764
Daniël de Kok
2024-11-20 16:17:12 +0000 -
305c743a48
74a8a820ad
into45013b60a4
#2761 Daniël de Kok2024-11-20 14:26:04 +0000 -
74a8a820ad
Use FP8 KV cache when specified by compressed-tensors
#2761
feature/compressed-tensors-kv-cache-fp8
Daniël de Kok
2024-11-20 12:31:47 +0000 -
45013b60a4
Install compressed-tensors in Docker CPU builds
Daniël de Kok
2024-11-20 14:17:47 +0000 -
87004ae711
2024-11-20 13:47:59 +0100 -
5dcae67ac4
14e8ca5236
intobd6e8b3c13
#2648 Hyeongchan Kim2024-11-20 17:55:45 +0900 -
5f52e2e38e
entries.len() could > batch.size in prefill, so need to filter as well.
#2754
Wang, Yi A
2024-11-19 23:27:45 -0800 -
fa577c9be2
fix: remove continue_final_message chat request param
#2733
support-continue-final-message
David Holtz
2024-11-19 21:24:18 +0000 -
bd6e8b3c13
2024-11-19 15:10:22 -0500 -
91fe29c1b1
fix: adjust llama MLP name from dense to mlp to correctly apply lora
#2760
drbh
2024-11-19 14:51:46 -0500 -
5489406c4a
2024-11-19 13:31:59 -0500 -
40cc9f4c79
05d68ae5c2
into2007a9473a
#2634 Linus Bierhoff2024-11-19 11:02:43 -0500 -
070af963f8
Add support for wNa16 int 2:4 compressed-tensors checkpoints
#2758
Daniël de Kok
2024-11-19 13:49:11 +0000 -
2007a9473a
2024-11-19 14:55:29 +0100 -
2b9d692831
Update to moe-kernels 0.7.0
#2720
Daniël de Kok
2024-11-04 15:04:04 +0000 -
b4ec427ad0
2024-11-19 08:04:23 +0100 -
d2581ed606
feat(backend): remove all the logs from hardware.hpp
trtllm/cancellation
Morgan Funtowicz
2024-11-19 00:19:22 +0100 -
f24e9fa2b9
feat(backend): initial rewrite of the backend for simplicity
Morgan Funtowicz
2024-11-19 00:17:35 +0100 -
a80c346f72
test(ctest) enable address sanitizer
Morgan Funtowicz
2024-11-19 00:17:10 +0100 -
0fd2ab3e89
fix: remove unused deps and imports
#2756
enable-qwen2vl-video
David Holtz
2024-11-18 21:48:09 +0000 -
e428a14d19
fix: add protobuf update and mp4parse dep
David Holtz
2024-11-18 21:22:19 +0000 -
a9c2d28a3a
feat: support video input chunks and enable qwen2 vl to process video
David Holtz
2024-11-18 21:16:21 +0000 -
34920ab9fa
feat(hardware) enable new hardware.hpp and unittests
Morgan Funtowicz
2024-11-18 21:51:44 +0100 -
03de92b38b
misc(cmake) update dependencies
Morgan Funtowicz
2024-11-18 21:50:44 +0100 -
6b4697e9d1
fix
Miquel Farre
2024-11-14 13:40:04 +0000 -
cee1dea803
refactoring
Miquel Farre
2024-11-14 12:30:59 +0000 -
f7cf45dfde
fix
Miquel Farre
2024-11-14 11:54:10 +0000 -
bd04258e2c
downloading videos
Miquel Farre
2024-11-14 11:36:11 +0000 -
f9ee2500cf
fix
Miquel Farre
2024-11-14 11:20:23 +0000 -
b4e096c080
connecting video to qwen2
Miquel Farre
2024-11-14 10:48:18 +0000 -
da644c21e5
adopting video url
Miquel Farre
2024-11-13 13:37:17 +0000 -
fc5b0ac1fd
router changes
Miquel Farre
2024-11-13 12:42:11 +0000 -
de6c68443e
WIP video support
Miquel Farre
2024-11-11 14:52:52 +0000 -
38cff84a3e
2024-11-18 12:46:40 -0500 -
3c9df21ff8
2024-11-18 17:20:31 +0100 -
c6393c5512
Simplify two ipex conditions
#2755
Daniël de Kok
2024-11-18 16:18:59 +0000 -
a5ecd6e586
2024-11-19 00:16:55 +0800 -
70409f09f4
fix: calc max_seqlen once and small refactors
#2721
David Holtz
2024-11-18 15:34:08 +0000 -
fea62e928f
2024-11-18 09:51:06 -0500 -
05f98efc9d
Small fixes
#2745
Daniël de Kok
2024-11-18 14:49:59 +0000 -
3eb6c1ccf8
2024-11-18 15:45:52 +0100 -
734fd1310b
6ae04672b6
into52e48739a5
#2591 Hugo Larcher2024-11-18 09:25:54 -0500 -
e0018723fc
Use marlin-kernels 0.3.5
Daniël de Kok
2024-11-18 12:43:12 +0000 -
53b6f6e604
2024-11-18 19:28:07 +0800 -
f76c0ff17f
Always use dynamic input quantization for w8a8 int
Daniël de Kok
2024-11-18 10:54:51 +0000 -
b2dc10aea5
Add support for compressed-tensors w8a8 int checkpoints
Daniël de Kok
2024-11-14 11:00:29 +0000 -
e0e39fa0d9
2024-11-18 09:45:05 +0800 -
52e48739a5
2024-11-17 17:34:50 +0100 -
6489f85269
2024-11-15 08:49:19 -0500 -
d8f1203bcb
2024-11-15 14:48:23 +0100 -
110d154777
Fix clippy warning
#2751
Daniël de Kok
2024-11-15 13:44:26 +0000 -
5d9613e0c5
2024-11-15 14:27:16 +0100 -
df72deac26
2024-11-15 20:24:47 +0700 -
22d205aa47
2024-10-25 11:55:44 -0400 -
a9c8c6a0d7
2024-10-25 14:35:25 +0000 -
21378b325b
2024-10-22 11:59:14 -0400 -
0ae84e5473
2024-10-22 09:53:15 -0400 -
84cd8434b0
2024-10-18 14:15:27 -0400 -
dfc00f7fb3
Remove vLLM dependency for CUDA
Daniël de Kok
2024-11-15 12:31:30 +0000 -
34a3bdedc3
2024-11-15 21:03:27 +0800 -
b52d6332e4
2024-11-15 13:45:22 +0100 -
8dffe1ca08
2024-11-15 13:33:47 +0100 -
1623a56544
2024-11-15 13:26:06 +0100 -
4580ced091
2024-11-15 07:22:52 -0500 -
003eaec0fb
2024-11-15 21:21:50 +0900 -
4f4857a4ac
2024-11-15 16:16:15 +0400 -
f9ee46f740
2024-11-15 16:15:36 +0400 -
8442f1ac85
2024-11-15 13:14:55 +0100 -
ca4f46ddfc
2024-11-14 18:48:20 +0100 -
c908aab440
nix: update nixpkgs
#2746
Daniël de Kok
2024-11-14 16:33:04 +0000 -
23d2bcf28d
misc(build): improve build process
Morgan Funtowicz
2024-11-14 09:38:13 +0100 -
70c90ad933
feat(backend): update llamacpp to 4077
Morgan Funtowicz
2024-11-14 09:04:06 +0100 -
6f059c4b5d
feat(backend): wrap Arc tokenizer to avoid duplicating
Morgan Funtowicz
2024-11-14 08:41:38 +0100 -
57b215467b
feat(backend): simplify Rust callback
Morgan Funtowicz
2024-11-13 00:22:11 +0100 -
daf1631e09
dockerfile(backend): initial working version of llama.cpp container
Morgan Funtowicz
2024-11-13 00:08:49 +0100 -
02cd6fe427
chore(backend): minor improvements
Morgan Funtowicz
2024-11-13 00:08:26 +0100 -
363d5e45de
feat(backend): use std::ranges to map uint32_t to llama_token
Morgan Funtowicz
2024-11-13 00:07:59 +0100 -
488ba93898
feat(backend): fix invalid reference to context in release mode
Morgan Funtowicz
2024-11-11 19:50:33 +0100 -
7e2890fe2c
feat(backend): remove unused function
Morgan Funtowicz
2024-11-11 19:50:11 +0100 -
6915fa3441
feat(backend): remove reinterpret_cast converting from uint32_t to llama_token(int32_t)
Morgan Funtowicz
2024-11-09 22:19:38 +0100