Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions denoiser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -481,6 +481,14 @@ struct CompVisVDenoiser : public CompVisDenoiser {
}
};

struct ComVisX0Denoiser : public CompVisDenoiser {
std::vector<float> get_scalings(float sigma) override {
float c_skip = 0.0f;
float c_out = 1.0f;
float c_in = 1.0f;
}
};

struct EDMVDenoiser : public CompVisVDenoiser {
float min_sigma = 0.002;
float max_sigma = 120.0;
Expand Down Expand Up @@ -568,6 +576,15 @@ struct DiscreteFlowDenoiser : public Denoiser {
}
};

struct DiscreteFlowX0Denoiser : public DiscreteFlowDenoiser {
std::vector<float> get_scalings(float sigma) override {
float c_skip = 0.0f;
float c_out = 1.0f;
float c_in = 1.0f;
return {c_skip, c_out, c_in};
}
};

float flux_time_shift(float mu, float sigma, float t) {
return std::exp(mu) / (std::exp(mu) + std::pow((1.0 / t - 1.0), sigma));
}
Expand Down Expand Up @@ -631,6 +648,15 @@ struct FluxFlowDenoiser : public Denoiser {
}
};

struct FluxFlowX0Denoiser : public FluxFlowDenoiser {
std::vector<float> get_scalings(float sigma) override {
float c_skip = 0.0f;
float c_out = 1.0f;
float c_in = 1.0f;
return {c_skip, c_out, c_in};
}
};

struct Flux2FlowDenoiser : public FluxFlowDenoiser {
Flux2FlowDenoiser() = default;

Expand Down
13 changes: 12 additions & 1 deletion flux.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -781,7 +781,7 @@ namespace Flux {
Flux(FluxParams params)
: params(params) {
if (params.version == VERSION_CHROMA_RADIANCE) {
std::pair<int, int> kernel_size = {(int)params.patch_size, (int)params.patch_size};
std::pair<int, int> kernel_size = {16, 16};
std::pair<int, int> stride = kernel_size;

blocks["img_in_patch"] = std::make_shared<Conv2d>(params.in_channels,
Expand Down Expand Up @@ -1068,6 +1068,14 @@ namespace Flux {
auto img = pad_to_patch_size(ctx->ggml_ctx, x);
auto orig_img = img;

if (patch_size != 16) {
int ratio = patch_size / 16;
// It's supposed to be using GGML_SCALE_MODE_NEAREST, but this seems more stable
// Maybe the implementation of nearest-neighbor interpolation in ggml behaves differently than the one in PyTorch?
// img = F.interpolate(img, size=(H//2, W//2), mode="nearest")
img = ggml_interpolate(ctx->ggml_ctx, img, W / ratio, H / ratio, C, x->ne[3], GGML_SCALE_MODE_BILINEAR);
}

auto img_in_patch = std::dynamic_pointer_cast<Conv2d>(blocks["img_in_patch"]);

img = img_in_patch->forward(ctx, img); // [N, hidden_size, H/patch_size, W/patch_size]
Expand Down Expand Up @@ -1290,6 +1298,9 @@ namespace Flux {
// not schnell
flux_params.guidance_embed = true;
}
if (tensor_name.find("__32x32__") != std::string::npos) {
flux_params.patch_size = 32;
}
if (tensor_name.find("distilled_guidance_layer.in_proj.weight") != std::string::npos) {
// Chroma
flux_params.is_chroma = true;
Expand Down
9 changes: 8 additions & 1 deletion model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1731,7 +1731,14 @@ bool ModelLoader::save_to_gguf_file(const std::string& file_path, ggml_type type
// tensor_storage.n_dims,
// tensor_storage.ne[0], tensor_storage.ne[1], tensor_storage.ne[2], tensor_storage.ne[3],
// tensor->n_dims, tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3]);


if (!tensor->data) {
GGML_ASSERT(ggml_nelements(tensor) == 0);
// avoid crashing the gguf writer by setting a dummy pointer for zero-sized tensors
LOG_DEBUG("setting dummy pointer for zero-sized tensor %s", name.c_str());
tensor->data = ggml_get_mem_buffer(ggml_ctx);
}

*dst_tensor = tensor;

gguf_add_tensor(gguf_ctx, tensor);
Expand Down
19 changes: 15 additions & 4 deletions stable-diffusion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -828,7 +828,12 @@ class StableDiffusionGGML {
}
}
} else if (sd_version_is_flux(version)) {
pred_type = FLUX_FLOW_PRED;
if (tensor_storage_map.find("model.diffusion_model.__x0__") != tensor_storage_map.end()) {
pred_type = FLUX_FLOW_X0_PRED;
} else {
pred_type = FLUX_FLOW_PRED;
}

if (flow_shift == INFINITY) {
flow_shift = 1.0f; // TODO: validate
for (const auto& [name, tensor_storage] : tensor_storage_map) {
Expand Down Expand Up @@ -871,6 +876,11 @@ class StableDiffusionGGML {
denoiser = std::make_shared<Flux2FlowDenoiser>();
break;
}
case FLUX_FLOW_X0_PRED: {
LOG_INFO("running in x0-prediction Flux FLOW mode");
denoiser = std::make_shared<FluxFlowX0Denoiser>();
break;
}
default: {
LOG_ERROR("Unknown predition type %i", pred_type);
ggml_free(ctx);
Expand Down Expand Up @@ -1316,9 +1326,9 @@ class StableDiffusionGGML {
uint32_t dim = latents->ne[ggml_n_dims(latents) - 1];

if (preview_mode == PREVIEW_PROJ) {
int64_t patch_sz = 1;
const float(*latent_rgb_proj)[channel] = nullptr;
float* latent_rgb_bias = nullptr;
int64_t patch_sz = 1;
const float (*latent_rgb_proj)[channel] = nullptr;
float* latent_rgb_bias = nullptr;

if (dim == 128) {
if (sd_version_is_flux2(version)) {
Expand Down Expand Up @@ -2424,6 +2434,7 @@ const char* prediction_to_str[] = {
"edm_v",
"sd3_flow",
"flux_flow",
"flux_flow_x0"
"flux2_flow",
};

Expand Down
1 change: 1 addition & 0 deletions stable-diffusion.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ enum prediction_t {
EDM_V_PRED,
FLOW_PRED,
FLUX_FLOW_PRED,
FLUX_FLOW_X0_PRED,
FLUX2_FLOW_PRED,
PREDICTION_COUNT
};
Expand Down
Loading