API Documentation
This page provides an overview of Stringy’s public API. For complete API documentation, run cargo doc --open in the project directory.
Core Types
FoundString
The primary data structure representing an extracted string with metadata.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FoundString {
/// The extracted string text
pub text: String,
/// Pre-demangled form (if symbol was demangled)
pub original_text: Option<String>,
/// The encoding used for this string
pub encoding: Encoding,
/// File offset where the string was found
pub offset: u64,
/// Relative Virtual Address (if available)
pub rva: Option<u64>,
/// Section name where the string was found
pub section: Option<String>,
/// Length of the string in bytes
pub length: u32,
/// Semantic tags applied to this string
pub tags: Vec<Tag>,
/// Relevance score for ranking
pub score: i32,
/// Section weight component of score (debug only)
pub section_weight: Option<i32>,
/// Semantic boost component of score (debug only)
pub semantic_boost: Option<i32>,
/// Noise penalty component of score (debug only)
pub noise_penalty: Option<i32>,
/// Display score 0-100, populated by ScoreNormalizer in all non-raw executions
pub display_score: Option<i32>,
/// Source of the string (section data, import, etc.)
pub source: StringSource,
/// UTF-16 confidence score
pub confidence: f32,
}
Encoding
Supported string encodings.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum Encoding {
Ascii,
Utf8,
Utf16Le,
Utf16Be,
}
Tag
Semantic classification tags.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub enum Tag {
Url,
Domain,
IPv4,
IPv6,
FilePath,
RegistryPath,
Guid,
Email,
Base64,
FormatString,
UserAgent,
DemangledSymbol,
Import,
Export,
Version,
Manifest,
Resource,
DylibPath,
Rpath,
RpathVariable,
FrameworkPath,
}
EncodingFilter
Filter for restricting output by string encoding, corresponding to the --enc CLI flag.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum EncodingFilter {
/// Match a specific encoding exactly
Exact(Encoding),
/// Match any UTF-16 variant (UTF-16LE or UTF-16BE)
Utf16Any,
}
Used with FilterConfig to limit results to a specific encoding. Utf16Any matches both Utf16Le and Utf16Be.
FilterConfig
Post-extraction filtering configuration. All fields have sensible defaults; empty tag vectors are no-ops.
pub struct FilterConfig {
/// Minimum string length to include (default: 4)
pub min_length: usize, // --min-len
/// Restrict to a specific encoding
pub encoding: Option<EncodingFilter>, // --enc
/// Only include strings with these tags (empty = no filter)
pub include_tags: Vec<Tag>, // --only-tags
/// Exclude strings with these tags (empty = no filter)
pub exclude_tags: Vec<Tag>, // --no-tags
/// Limit output to top N strings by score
pub top_n: Option<usize>, // --top
}
Builder-style construction:
let config = FilterConfig::new()
.with_min_length(6)
.with_encoding(EncodingFilter::Exact(Encoding::Utf8))
.with_include_tags(vec![Tag::Url, Tag::Domain])
.with_top_n(20);
Main API Functions
BasicExtractor::extract
Extract strings from binary data using the BasicExtractor, which implements the StringExtractor trait.
pub trait StringExtractor {
fn extract(
&self,
data: &[u8],
container_info: &ContainerInfo,
config: &ExtractionConfig,
) -> Result<Vec<FoundString>>;
}
Parameters:
data: Binary data to analyzecontainer_info: Parsed container metadata (sections, imports, exports)config: Extraction configuration options
Returns:
Result<Vec<FoundString>>: Extracted strings with metadata
Example:
use stringy::{BasicExtractor, ExtractionConfig, StringExtractor};
use stringy::container::{detect_format, create_parser};
let data = std::fs::read("binary.exe")?;
let format = detect_format(&data);
let parser = create_parser(format)?;
let container_info = parser.parse(&data)?;
let extractor = BasicExtractor::new();
let config = ExtractionConfig::default();
let strings = extractor.extract(&data, &container_info, &config)?;
for string in strings {
println!("{}: {}", string.score, string.text);
}
detect_format
Detect the binary format of the given data.
pub fn detect_format(data: &[u8]) -> BinaryFormat
Parameters:
data: Binary data to analyze
Returns:
BinaryFormat: Detected format (ELF, PE, MachO, or Unknown)
Example:
use stringy::detect_format;
let data = std::fs::read("binary")?;
let format = detect_format(&data);
println!("Detected format: {:?}", format);
Configuration
ExtractionConfig
Configuration options for string extraction. The struct has 16 fields with sensible defaults.
pub struct ExtractionConfig {
/// Minimum string length in bytes (default: 1)
pub min_length: usize,
/// Maximum string length in bytes (default: 4096)
pub max_length: usize,
/// Whether to scan executable sections (default: true)
pub scan_code_sections: bool,
/// Whether to include debug sections (default: false)
pub include_debug: bool,
/// Section types to prioritize (default: StringData, ReadOnlyData, Resources)
pub section_priority: Vec<SectionType>,
/// Whether to include import/export names (default: true)
pub include_symbols: bool,
/// Minimum length for ASCII strings (default: 1)
pub min_ascii_length: usize,
/// Minimum length for UTF-16 strings (default: 1)
pub min_wide_length: usize,
/// Which encodings to extract (default: ASCII, UTF-8)
pub enabled_encodings: Vec<Encoding>,
/// Enable/disable noise filtering (default: true)
pub noise_filtering_enabled: bool,
/// Minimum confidence threshold (default: 0.5)
pub min_confidence_threshold: f32,
/// Minimum UTF-16LE confidence threshold (default: 0.7)
pub utf16_min_confidence: f32,
/// Which UTF-16 byte order(s) to scan (default: Auto)
pub utf16_byte_order: ByteOrder,
/// Minimum UTF-16-specific confidence threshold (default: 0.5)
pub utf16_confidence_threshold: f32,
/// Enable/disable deduplication (default: true)
pub enable_deduplication: bool,
/// Deduplication threshold (default: None)
pub dedup_threshold: Option<usize>,
}
impl Default for ExtractionConfig {
fn default() -> Self {
Self {
min_length: 1,
max_length: 4096,
scan_code_sections: true,
include_debug: false,
section_priority: vec![
SectionType::StringData,
SectionType::ReadOnlyData,
SectionType::Resources,
],
include_symbols: true,
min_ascii_length: 1,
min_wide_length: 1,
enabled_encodings: vec![Encoding::Ascii, Encoding::Utf8],
noise_filtering_enabled: true,
min_confidence_threshold: 0.5,
utf16_min_confidence: 0.7,
utf16_byte_order: ByteOrder::Auto,
utf16_confidence_threshold: 0.5,
enable_deduplication: true,
dedup_threshold: None,
}
}
}
SemanticClassifier
The SemanticClassifier is constructed via SemanticClassifier::new() and currently has no configuration options. Classification patterns are built-in.
Pipeline Components
ScoreNormalizer
Maps internal relevance scores to a 0-100 display scale using band mapping.
let normalizer = ScoreNormalizer::new();
normalizer.normalize(&mut strings);
// Each FoundString now has display_score populated
Invoked unconditionally by the pipeline in all non-raw executions. Negative internal scores map to display_score = 0. See Ranking for the full band-mapping table.
FilterEngine
Applies post-extraction filtering and sorting. Consumes the input vector and returns a filtered, sorted result.
let engine = FilterEngine::new();
let filtered = engine.apply(strings, &filter_config);
Filter order:
- Minimum length (
min_length) - Encoding match (
encoding) - Include tags (
include_tags– keep only strings with at least one matching tag) - Exclude tags (
exclude_tags– remove strings with any matching tag) - Stable sort by score (descending), then offset (ascending), then text (ascending)
- Top-N truncation (
top_n)
Example: FilterConfig + FilterEngine
use stringy::{FilterConfig, FilterEngine, EncodingFilter, Encoding, Tag};
let config = FilterConfig::new()
.with_min_length(6)
.with_include_tags(vec![Tag::Url, Tag::Domain])
.with_top_n(10);
let engine = FilterEngine::new();
let results = engine.apply(strings, &config);
// results contains at most 10 strings, all >= 6 chars,
// all tagged Url or Domain, sorted by score descending
Container Parsing
ContainerParser Trait
Trait for implementing binary format parsers.
pub trait ContainerParser {
/// Detect if this parser can handle the given data
fn detect(data: &[u8]) -> bool
where
Self: Sized;
/// Parse the container and extract metadata
fn parse(&self, data: &[u8]) -> Result<ContainerInfo>;
}
ContainerInfo
Information about a parsed binary container.
pub struct ContainerInfo {
/// The binary format detected
pub format: BinaryFormat,
/// List of sections in the binary
pub sections: Vec<SectionInfo>,
/// Import information
pub imports: Vec<ImportInfo>,
/// Export information
pub exports: Vec<ExportInfo>,
/// Resource metadata (PE format only)
pub resources: Option<Vec<ResourceMetadata>>,
}
SectionInfo
Information about a section within the binary.
pub struct SectionInfo {
/// Section name
pub name: String,
/// File offset of the section
pub offset: u64,
/// Size of the section in bytes
pub size: u64,
/// Relative Virtual Address (if available)
pub rva: Option<u64>,
/// Classification of the section type
pub section_type: SectionType,
/// Whether the section is executable
pub is_executable: bool,
/// Whether the section is writable
pub is_writable: bool,
/// Weight indicating likelihood of containing meaningful strings (1.0-10.0)
pub weight: f32,
}
Output Formatting
OutputFormatter Trait
Trait for implementing output formatters.
pub trait OutputFormatter {
/// Returns the name of this formatter
fn name(&self) -> &'static str;
/// Format the strings for output
fn format(&self, strings: &[FoundString], metadata: &OutputMetadata) -> Result<String>;
}
Built-in Formatters
The library provides free functions rather than formatter structs:
format_table(strings, metadata)- Human-readable table format (TTY-aware)format_json(strings, metadata)- JSONL formatformat_yara(strings, metadata)- YARA rule formatformat_output(strings, metadata)- Dispatches based onmetadata.output_format
Example:
use stringy::output::{format_json, OutputMetadata};
let metadata = OutputMetadata::new("binary.exe".to_string());
let output = format_json(&strings, &metadata)?;
println!("{}", output);
Error Handling
StringyError
Comprehensive error type for the library.
#[derive(Debug, thiserror::Error)]
pub enum StringyError {
#[error("Unsupported file format (supported: ELF, PE, Mach-O)")]
UnsupportedFormat,
#[error("File I/O error: {0}")]
IoError(#[from] std::io::Error),
#[error("Binary parsing error: {0}")]
ParseError(String),
#[error("Invalid encoding in string at offset {offset}")]
EncodingError { offset: u64 },
#[error("Configuration error: {0}")]
ConfigError(String),
#[error("Serialization error: {0}")]
SerializationError(String),
#[error("Validation error: {0}")]
ValidationError(String),
#[error("Memory mapping error: {0}")]
MemoryMapError(String),
}
Result Type
Convenient result type alias.
pub type Result<T> = std::result::Result<T, StringyError>;
Advanced Usage
Custom Classification
Implement custom semantic classifiers:
use stringy::classification::{ClassificationResult, Classifier};
pub struct CustomClassifier {
// Custom implementation
}
impl Classifier for CustomClassifier {
fn classify(&self, text: &str, context: &StringContext) -> Vec<ClassificationResult> {
// Custom classification logic
vec![]
}
}
Memory-Mapped Files
For large files, use memory mapping via mmap-guard:
let data = mmap_guard::map_file(path)?;
// data implements Deref<Target = [u8]>
Note: The Pipeline::run API handles memory mapping automatically. Direct use of mmap_guard is only needed when using lower-level APIs.
Parallel Processing
Parallel processing is not yet implemented. Stringy currently processes files sequentially. The Pipeline API processes one file at a time.
Feature Flags
Stringy currently has no optional feature flags. All functionality is included by default.
Examples
Basic String Extraction (Pipeline API)
use stringy::pipeline::{Pipeline, PipelineConfig};
use std::path::Path;
fn main() -> stringy::Result<()> {
let config = PipelineConfig::default();
let pipeline = Pipeline::new(config);
pipeline.run(Path::new("binary.exe"))?;
Ok(())
}
Filtered Extraction
use stringy::{BasicExtractor, ExtractionConfig, StringExtractor, Tag};
use stringy::container::{detect_format, create_parser};
fn extract_network_indicators(data: &[u8]) -> stringy::Result<Vec<String>> {
let format = detect_format(data);
let parser = create_parser(format)?;
let container_info = parser.parse(data)?;
let extractor = BasicExtractor::new();
let config = ExtractionConfig::default();
let strings = extractor.extract(data, &container_info, &config)?;
let network_strings: Vec<String> = strings
.into_iter()
.filter(|s| {
s.tags
.iter()
.any(|tag| matches!(tag, Tag::Url | Tag::Domain | Tag::IPv4 | Tag::IPv6))
})
.filter(|s| s.score >= 70)
.map(|s| s.text)
.collect();
Ok(network_strings)
}
Custom Output Format
use serde_json::json;
use stringy::output::{OutputMetadata, OutputFormatter};
use stringy::FoundString;
pub struct CustomFormatter;
impl OutputFormatter for CustomFormatter {
fn name(&self) -> &'static str {
"custom"
}
fn format(&self, strings: &[FoundString], _metadata: &OutputMetadata) -> stringy::Result<String> {
let output = json!({
"total_strings": strings.len(),
"high_confidence": strings.iter().filter(|s| s.score >= 80).count(),
"strings": strings.iter().take(20).collect::<Vec<_>>()
});
Ok(serde_json::to_string_pretty(&output)?)
}
}
For complete API documentation with all methods and implementation details, run:
cargo doc --open