from flask import Flask import ollama from util import time_model app = Flask(__name__) @app.route('/') def hello_world(): return "

Hello, World!

" @app.route('/list') def test_world(): from ollama import ListResponse, list as ollist response: ListResponse = ollist() ansver = [] for model in response.models: ansver.append(f'

Name:{model.model}

') ansver.append(f'\t Size (MB): {(model.size.real / 1024 / 1024):.2f}') if model.details: ansver.append(f'\t Format: {model.details.format}') ansver.append(f'\t Family: {model.details.family}') ansver.append(f'\t Parameter Size: {model.details.parameter_size}') ansver.append(f'\t Quantization Level:{model.details.quantization_level}') result = [f'

{answ}

' for answ in ansver ] return '\n'.join(result) @app.route('/ps') def ps(): from ollama import ProcessResponse, chat, ps, pull ansver = [] response: ProcessResponse = ps() for model in response.models: ansver.append(f'

Model

: {model.model}') ansver.append(f' Digest: {model.digest}') ansver.append(f' Expires at: {model.expires_at}') ansver.append(f' Size: {model.size}') ansver.append(f' Size vram: {model.size_vram}') ansver.append(f' Details: {model.details}') ansver.append(f' Context length: {model.context_length}') ansver.append(f'\n') result = [f'

{answ}

' for answ in ansver ] return '\n'.join(result) @app.route('/time') def test_time(): return time_model('qwen3:0.6b') @app.route('/time17') def test_time17(): return time_model('qwen3:1.7b') @app.route('/time4') def test_time4(): return time_model('qwen3:4b') @app.route('/time8') def test_time8(): return time_model('qwen3:8b') @app.route('/time14') def test_time14(): return time_model('qwen3:14b') #@app.route('/time30') def test_time30(): return time_model('qwen3:30b') if __name__ == '__main__': app.run(host='0.0.0.0', port="7860")