# create_sample_dataset.py - Generate Sample Documents for Mini-VersionQA
import os
SAMPLE_DOCS = {
# Node.js Assert - 3 versions
"nodejs_assert_v20.0.txt": """# Node.js Assert Module v20.0
The assert module provides a set of assertion functions for testing invariants in your code.
## Overview
The assert module is used for writing tests. It provides functions to verify that your code is working as expected.
## Basic Usage
```javascript
const assert = require('assert');
// Strict equality assertion
assert.strictEqual(1, 1);
// Deep equality assertion
assert.deepStrictEqual({a: 1}, {a: 1});
```
## Available Functions
- assert.ok(value): Tests if value is truthy
- assert.strictEqual(actual, expected): Tests strict equality
- assert.deepStrictEqual(actual, expected): Tests deep equality
- assert.notStrictEqual(actual, expected): Tests strict inequality
- assert.throws(fn): Tests if function throws an error
## Error Messages
When assertions fail, the assert module provides detailed error messages showing:
- The actual value received
- The expected value
- The assertion type that failed
Version: v20.0
Released: 2023-04
""",
"nodejs_assert_v21.0.txt": """# Node.js Assert Module v21.0
The assert module provides a set of assertion functions for testing invariants in your code.
## Overview
The assert module is used for writing tests. It provides functions to verify that your code is working as expected.
## NEW in v21.0: Strict Mode
**MAJOR ADDITION**: The assert module now includes a strict mode by default!
```javascript
const assert = require('assert').strict;
// All assertions now use strict equality by default
assert.equal(1, 1); // Now uses strictEqual internally
```
## Basic Usage
```javascript
const assert = require('assert');
// Strict equality assertion
assert.strictEqual(1, 1);
// Deep equality assertion
assert.deepStrictEqual({a: 1}, {a: 1});
```
## Available Functions
- assert.ok(value): Tests if value is truthy
- assert.strictEqual(actual, expected): Tests strict equality
- assert.deepStrictEqual(actual, expected): Tests deep equality
- assert.notStrictEqual(actual, expected): Tests strict inequality
- assert.throws(fn): Tests if function throws an error
- **NEW**: assert.rejects(promise): Tests if promise rejects (async support)
- **NEW**: assert.strict: Strict mode enabled by default
## Strict Mode Benefits
- Prevents common mistakes with type coercion
- Enforces strict equality checks
- Better error messages for mismatched types
## Error Messages
When assertions fail, the assert module provides detailed error messages showing:
- The actual value received
- The expected value
- The assertion type that failed
- Stack trace for debugging
Version: v21.0
Released: 2023-10
""",
"nodejs_assert_v23.0.txt": """# Node.js Assert Module v23.0
The assert module provides a set of assertion functions for testing invariants in your code.
## Overview
The assert module is used for writing tests. It provides functions to verify that your code is working as expected.
## Strict Mode (Added in v21.0)
The assert module includes a strict mode by default:
```javascript
const assert = require('assert').strict;
// All assertions now use strict equality by default
assert.equal(1, 1); // Uses strictEqual internally
```
## NEW in v23.0: Enhanced Diff Output
**MAJOR IMPROVEMENT**: Better visualization of differences in failed assertions!
```javascript
// Now shows colored diff output for complex objects
assert.deepStrictEqual(
{ user: { name: 'John', age: 30 } },
{ user: { name: 'Jane', age: 30 } }
);
// Output shows highlighted differences with + and - markers
```
## Available Functions
- assert.ok(value): Tests if value is truthy
- assert.strictEqual(actual, expected): Tests strict equality
- assert.deepStrictEqual(actual, expected): Tests deep equality with enhanced diff
- assert.notStrictEqual(actual, expected): Tests strict inequality
- assert.throws(fn): Tests if function throws an error
- assert.rejects(promise): Tests if promise rejects (async support)
- assert.strict: Strict mode enabled by default
- **NEW**: assert.match(string, regexp): Tests string against regexp
- **NEW**: assert.snapshot(value, snapshot): Snapshot testing support
## Strict Mode Benefits
- Prevents common mistakes with type coercion
- Enforces strict equality checks
- Better error messages for mismatched types
## Enhanced Error Messages (v23.0)
- Color-coded diff output
- Side-by-side comparison for objects
- Detailed stack traces with source maps
- Performance metrics for failed assertions
Version: v23.0
Released: 2024-04
""",
# Bootstrap - 2 versions
"bootstrap_v5.2.txt": """# Bootstrap v5.2 Documentation
## Grid System
Bootstrap includes a powerful mobile-first flexbox grid system for building layouts of all shapes and sizes.
### Grid Classes
The grid system uses a series of containers, rows, and columns to layout and align content.
#### Container Classes
- `.container`: Fixed-width container
- `.container-fluid`: Full-width container
- `.container-{breakpoint}`: Responsive container
#### Row Classes
- `.row`: Creates a grid row
- `.row-cols-*`: Set number of columns
#### Column Classes
- `.col`: Equal-width columns
- `.col-{breakpoint}`: Responsive columns
- `.col-{number}`: Sized columns (1-12)
- `.col-{breakpoint}-{number}`: Responsive sized columns
### Responsive Breakpoints
- xs: <576px
- sm: ≥576px
- md: ≥768px
- lg: ≥992px
- xl: ≥1200px
- xxl: ≥1400px
### Example Usage
```html
Column 1
Column 2
Column 3
```
### Grid Gutters
- `.g-*`: Gutter spacing (0-5)
- `.gx-*`: Horizontal gutters
- `.gy-*`: Vertical gutters
Version: v5.2
Released: 2022-07
""",
"bootstrap_v5.3.txt": """# Bootstrap v5.3 Documentation
## Grid System
Bootstrap includes a powerful mobile-first flexbox grid system for building layouts of all shapes and sizes.
### Grid Classes
The grid system uses a series of containers, rows, and columns to layout and align content.
#### Container Classes
- `.container`: Fixed-width container
- `.container-fluid`: Full-width container
- `.container-{breakpoint}`: Responsive container
#### Row Classes
- `.row`: Creates a grid row
- `.row-cols-*`: Set number of columns
#### Column Classes
- `.col`: Equal-width columns
- `.col-{breakpoint}`: Responsive columns
- `.col-{number}`: Sized columns (1-12)
- `.col-{breakpoint}-{number}`: Responsive sized columns
### Responsive Breakpoints
- xs: <576px
- sm: ≥576px
- md: ≥768px
- lg: ≥992px
- xl: ≥1200px
- xxl: ≥1400px
### Example Usage
```html
Column 1
Column 2
Column 3
```
### Grid Gutters
- `.g-*`: Gutter spacing (0-5)
- `.gx-*`: Horizontal gutters
- `.gy-*`: Vertical gutters
## NEW in v5.3: Utility Classes
### Extended Color Utilities
**ADDITION**: New color utility classes for more granular control:
- `.text-primary-emphasis`
- `.text-secondary-emphasis`
- `.bg-primary-subtle`
- `.bg-secondary-subtle`
- `.border-primary-subtle`
### Extended Spacing Utilities
**ADDITION**: New spacing utilities:
- `.p-*`: Padding (now includes half-step increments)
- `.m-*`: Margin (now includes half-step increments)
- Example: `.p-2-5` for padding of 0.625rem
### Focus Ring Utilities
**NEW FEATURE**: Custom focus ring utilities:
- `.focus-ring`
- `.focus-ring-{color}`
- Provides accessible focus indicators
### Link Utilities
**IMPROVEMENT**: Enhanced link utilities:
- `.link-opacity-*`: Control link opacity (10-100)
- `.link-underline-opacity-*`: Control underline opacity
- Better accessibility for link states
### Example New Utilities
```html
```
Version: v5.3
Released: 2023-05
Changes from v5.2: Added emphasis colors, extended spacing, focus ring utilities, enhanced link controls
""",
# Spark - 2 versions
"spark_v3.0.txt": """# Apache Spark v3.0 Documentation
## DataFrame API
DataFrames are distributed collections of data organized into named columns, conceptually equivalent to tables in relational databases.
### Creating DataFrames
```python
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName("example").getOrCreate()
# From list
df = spark.createDataFrame([(1, "John"), (2, "Jane")], ["id", "name"])
# From RDD
rdd = spark.sparkContext.parallelize([(1, "John"), (2, "Jane")])
df = spark.createDataFrame(rdd, ["id", "name"])
# From file
df = spark.read.csv("data.csv", header=True, inferSchema=True)
```
### DataFrame Operations
#### Select
```python
df.select("name").show()
df.select(df["name"], df["id"] + 1).show()
```
#### Filter
```python
df.filter(df["id"] > 1).show()
df.where(df["name"] == "John").show()
```
#### GroupBy
```python
df.groupBy("name").count().show()
df.groupBy("department").agg({"salary": "avg"}).show()
```
#### Join
```python
df1.join(df2, df1["id"] == df2["id"], "inner").show()
```
### Schema Definition
```python
from pyspark.sql.types import StructType, StructField, StringType, IntegerType
schema = StructType([
StructField("id", IntegerType(), True),
StructField("name", StringType(), True)
])
df = spark.createDataFrame(data, schema)
```
### Data Types
- IntegerType, LongType, FloatType, DoubleType
- StringType, BinaryType
- BooleanType
- DateType, TimestampType
- ArrayType, MapType, StructType
Version: v3.0
Released: 2020-06
""",
"spark_v3.5.txt": """# Apache Spark v3.5 Documentation
## DataFrame API
DataFrames are distributed collections of data organized into named columns, conceptually equivalent to tables in relational databases.
### Creating DataFrames
```python
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName("example").getOrCreate()
# From list
df = spark.createDataFrame([(1, "John"), (2, "Jane")], ["id", "name"])
# From file (improved in v3.5)
df = spark.read.csv("data.csv", header=True, inferSchema=True)
df = spark.read.json("data.json")
df = spark.read.parquet("data.parquet")
```
### DataFrame Operations
#### Select
```python
df.select("name").show()
df.select(df["name"], df["id"] + 1).show()
```
#### Filter
```python
df.filter(df["id"] > 1).show()
df.where(df["name"] == "John").show()
```
#### GroupBy
```python
df.groupBy("name").count().show()
df.groupBy("department").agg({"salary": "avg"}).show()
```
#### Join (Enhanced in v3.5)
```python
# New: Support for multiple join types
df1.join(df2, df1["id"] == df2["id"], "inner").show()
df1.join(df2, "id", "left_outer").show() # Simplified syntax
```
### Schema Definition
```python
from pyspark.sql.types import StructType, StructField, StringType, IntegerType
schema = StructType([
StructField("id", IntegerType(), True),
StructField("name", StringType(), True)
])
df = spark.createDataFrame(data, schema)
```
### Data Types
- IntegerType, LongType, FloatType, DoubleType
- StringType, BinaryType
- BooleanType
- DateType, TimestampType
- ArrayType, MapType, StructType
## REMOVED in v3.5
**DEPRECATED APIs REMOVED**:
- `DataFrame.inferSchema()` - Use `spark.read` with `inferSchema=True` instead
- `SQLContext` - Use `SparkSession` instead
- Legacy `RDD.toDF()` without schema - Now requires explicit schema
- Old Window functions syntax - Use new SQL standard syntax
**Breaking Changes**:
- Python 2 support removed
- Scala 2.11 support removed
- Legacy Hive metastore APIs removed
## NEW in v3.5
**Performance Improvements**:
- Adaptive Query Execution (AQE) enabled by default
- Dynamic partition pruning enhancements
- Better join reordering
**New Features**:
- Built-in ML preprocessing functions
- Enhanced error messages with suggestions
- Better compatibility with Pandas 2.0
Version: v3.5
Released: 2023-09
Major Changes: Removed deprecated APIs, improved performance, Python 2 support dropped
""",
# Healthcare
"clinical_guidelines_v1.0.txt": """# Clinical Treatment Guidelines v1.0
## Introduction
These guidelines provide evidence-based recommendations for patient care and treatment protocols.
## General Treatment Protocols
### Patient Assessment
1. Initial examination and history taking
2. Vital signs measurement
3. Physical examination
4. Laboratory tests as indicated
5. Diagnostic imaging when necessary
### Medication Administration
- Follow five rights: right patient, right drug, right dose, right route, right time
- Document all medications given
- Monitor for adverse reactions
- Patient education on medication use
### Infection Control
- Standard precautions for all patients
- Hand hygiene before and after patient contact
- Use of personal protective equipment (PPE)
- Proper disposal of medical waste
- Environmental cleaning protocols
### Pain Management
- Assess pain using standardized scales (0-10)
- Non-pharmacological interventions first
- Pharmacological options when indicated
- Regular reassessment and documentation
- Patient-controlled analgesia when appropriate
### Common Conditions
#### Hypertension
- Target BP: <140/90 mmHg
- First-line: ACE inhibitors or thiazide diuretics
- Lifestyle modifications: diet, exercise, stress reduction
- Regular monitoring and follow-up
#### Diabetes Management
- Target HbA1c: <7%
- Blood glucose monitoring
- Insulin or oral hypoglycemics as indicated
- Dietary counseling
- Regular foot examinations
#### Respiratory Infections
- Symptomatic treatment
- Antibiotics only for bacterial infections
- Rest and hydration
- Isolation precautions if necessary
### Documentation Requirements
- All interventions must be documented
- Adverse events reported immediately
- Patient progress notes daily
- Discharge planning initiated early
Version: v1.0
Effective Date: January 2023
""",
"clinical_guidelines_v2.0.txt": """# Clinical Treatment Guidelines v2.0
## Introduction
These guidelines provide evidence-based recommendations for patient care and treatment protocols.
**UPDATED for v2.0**: Incorporates latest research findings and new treatment modalities.
## General Treatment Protocols
### Patient Assessment
1. Initial examination and history taking
2. Vital signs measurement (now includes SpO2 monitoring)
3. Physical examination
4. Laboratory tests as indicated
5. Diagnostic imaging when necessary
6. **NEW**: Risk stratification scoring
### Medication Administration
- Follow five rights: right patient, right drug, right dose, right route, right time
- Document all medications given
- Monitor for adverse reactions
- Patient education on medication use
- **NEW**: Electronic verification system required
- **NEW**: Double-check protocol for high-risk medications
### Infection Control
- Standard precautions for all patients
- Hand hygiene before and after patient contact
- Use of personal protective equipment (PPE)
- Proper disposal of medical waste
- Environmental cleaning protocols
- **NEW**: Enhanced protocols for multi-drug resistant organisms
- **NEW**: Mandatory staff screening during outbreaks
### Pain Management
- Assess pain using standardized scales (0-10)
- Non-pharmacological interventions first
- Pharmacological options when indicated
- Regular reassessment and documentation
- Patient-controlled analgesia when appropriate
- **NEW**: Multimodal analgesia approach preferred
- **NEW**: Reduced opioid prescribing guidelines
### Common Conditions
#### Hypertension (UPDATED)
- **NEW Target BP: <130/80 mmHg** (lowered from 140/90)
- First-line: ACE inhibitors or thiazide diuretics
- **NEW**: Consider combination therapy for BP >140/90
- Lifestyle modifications: diet, exercise, stress reduction
- Regular monitoring and follow-up
- **NEW**: Home blood pressure monitoring encouraged
#### Diabetes Management (UPDATED)
- Target HbA1c: <7% (individualized for elderly: <8%)
- Blood glucose monitoring
- **NEW**: GLP-1 agonists as first-line for cardiovascular benefit
- Insulin or oral hypoglycemics as indicated
- Dietary counseling with registered dietitian
- Regular foot examinations
- **NEW**: Annual retinal screening mandatory
- **NEW**: Cardiovascular risk assessment required
#### Respiratory Infections
- Symptomatic treatment
- Antibiotics only for bacterial infections
- Rest and hydration
- Isolation precautions if necessary
- **NEW**: Rapid PCR testing for influenza and COVID-19
- **NEW**: Updated isolation protocols
### NEW SECTION: Telemedicine Protocols
- Video visit guidelines
- Remote monitoring for chronic conditions
- Digital prescription protocols
- Documentation requirements for virtual care
### Documentation Requirements
- All interventions must be documented in EHR
- Adverse events reported immediately (within 24 hours)
- Patient progress notes daily
- Discharge planning initiated within 24 hours
- **NEW**: Quality metrics tracking required
- **NEW**: Patient satisfaction surveys
Version: v2.0
Effective Date: January 2024
Major Changes: Updated BP targets, new diabetes medications, enhanced infection control, telemedicine added
""",
# Finance
"compliance_fy2023.txt": """# Financial Compliance Report FY2023
## Regulatory Overview
This document outlines the compliance requirements for financial reporting and operations for Fiscal Year 2023.
## Key Regulations
### SOX Compliance (Sarbanes-Oxley Act)
- Section 302: CEO/CFO certification of financial statements
- Section 404: Internal control assessment
- Section 409: Real-time disclosure of material changes
- Annual external audit required
- Quarterly internal control testing
### Anti-Money Laundering (AML)
- Customer due diligence (CDD) required
- Transaction monitoring systems operational
- Suspicious Activity Reports (SARs) filed when appropriate
- Employee training completed annually
- Independent testing of AML program
### Know Your Customer (KYC)
- Identity verification for all new customers
- Beneficial ownership identification
- Enhanced due diligence for high-risk customers
- Ongoing monitoring and updates
- Documentation retention for 5 years
### Data Privacy
- GDPR compliance for EU customers
- CCPA compliance for California residents
- Data encryption at rest and in transit
- Access controls and authentication
- Breach notification procedures
## Reporting Requirements
### Financial Statements
- Quarterly 10-Q filings
- Annual 10-K filing
- Earnings releases
- Management Discussion & Analysis (MD&A)
- Audited financial statements
### Regulatory Filings
- Form 13F for institutional investment managers
- Form 4 for insider transactions
- Schedule 13D/G for beneficial ownership
- Form 8-K for material events
### Internal Reports
- Monthly management reports
- Quarterly compliance certifications
- Annual risk assessments
- Internal audit findings
- Board committee reports
## Risk Management
### Operational Risk
- Business continuity planning
- Disaster recovery testing
- Vendor management oversight
- Cybersecurity assessments
- Insurance coverage review
### Market Risk
- Value at Risk (VaR) calculations
- Stress testing scenarios
- Concentration limits
- Hedging strategies
- Daily position monitoring
### Credit Risk
- Credit rating assessments
- Exposure limits by counterparty
- Collateral management
- Provision for loan losses
- Portfolio diversification
## Compliance Metrics FY2023
- Total regulatory filings: 48
- Internal audits conducted: 12
- Compliance training completion: 98%
- Zero material violations
- External audit: Clean opinion
Fiscal Year: 2023
Report Date: December 2023
""",
"compliance_fy2024.txt": """# Financial Compliance Report FY2024
## Regulatory Overview
This document outlines the compliance requirements for financial reporting and operations for Fiscal Year 2024.
**MAJOR UPDATES for FY2024**: New SEC rules, enhanced cybersecurity requirements, and ESG disclosures.
## Key Regulations
### SOX Compliance (Sarbanes-Oxley Act)
- Section 302: CEO/CFO certification of financial statements
- Section 404: Internal control assessment
- Section 409: Real-time disclosure of material changes
- Annual external audit required
- Quarterly internal control testing
- **NEW**: Enhanced documentation requirements
### Anti-Money Laundering (AML)
- Customer due diligence (CDD) required
- Transaction monitoring systems operational
- Suspicious Activity Reports (SARs) filed when appropriate
- Employee training completed annually
- Independent testing of AML program
- **NEW**: Real-time transaction monitoring enhanced
- **NEW**: Cryptocurrency transaction monitoring added
### Know Your Customer (KYC)
- Identity verification for all new customers
- Beneficial ownership identification
- Enhanced due diligence for high-risk customers
- Ongoing monitoring and updates
- Documentation retention for 5 years
- **NEW**: Biometric verification for high-risk accounts
- **NEW**: Automated screening against sanctions lists
### Data Privacy (UPDATED)
- GDPR compliance for EU customers
- CCPA compliance for California residents
- **NEW**: CPRA (California Privacy Rights Act) requirements
- Data encryption at rest and in transit
- Access controls and multi-factor authentication
- Breach notification procedures
- **NEW**: Data mapping and inventory required
- **NEW**: Privacy impact assessments for new systems
### NEW: Cybersecurity Disclosure Rules
- **MAJOR ADDITION**: SEC cybersecurity disclosure requirements
- Material cybersecurity incidents reported within 4 days
- Annual cybersecurity governance disclosure
- Board oversight of cybersecurity risk
- Incident response plan documented and tested
### NEW: ESG Disclosure Requirements
- **MAJOR ADDITION**: Climate-related disclosure rules
- Scope 1 and 2 emissions reporting
- Material climate risks identified
- Board oversight of climate risks
- Third-party assurance of emissions data
## Reporting Requirements
### Financial Statements
- Quarterly 10-Q filings
- Annual 10-K filing
- Earnings releases
- Management Discussion & Analysis (MD&A)
- Audited financial statements
- **NEW**: Inline XBRL tagging required
### Regulatory Filings
- Form 13F for institutional investment managers
- Form 4 for insider transactions
- Schedule 13D/G for beneficial ownership
- Form 8-K for material events
- **NEW**: Form 8-K for cybersecurity incidents
- **NEW**: Climate disclosure forms
### Internal Reports
- Monthly management reports
- Quarterly compliance certifications
- Annual risk assessments
- Internal audit findings
- Board committee reports
- **NEW**: Monthly cybersecurity dashboards
- **NEW**: Quarterly ESG metrics
## Risk Management
### Operational Risk
- Business continuity planning
- Disaster recovery testing (now quarterly)
- Vendor management oversight with annual reviews
- **NEW**: Third-party risk assessment enhanced
- Cybersecurity assessments (now monthly)
- Insurance coverage review
- **NEW**: Ransomware response protocols
### Market Risk
- Value at Risk (VaR) calculations
- Stress testing scenarios (now includes crypto)
- Concentration limits
- Hedging strategies
- Daily position monitoring
- **NEW**: Climate scenario analysis
### Credit Risk
- Credit rating assessments
- Exposure limits by counterparty
- Collateral management
- Provision for loan losses (CECL methodology)
- Portfolio diversification
- **NEW**: ESG factors in credit analysis
### NEW: Cybersecurity Risk
- Penetration testing quarterly
- Vulnerability assessments monthly
- Security awareness training for all employees
- Incident response plan tested annually
- 24/7 security operations center
- Zero-trust architecture implementation
## Compliance Metrics FY2024
- Total regulatory filings: 56 (↑17% from FY2023)
- Internal audits conducted: 16 (↑33%)
- Compliance training completion: 99.5%
- Zero material violations
- External audit: Clean opinion
- **NEW**: Cybersecurity incidents reported: 0
- **NEW**: ESG disclosure score: A-
Fiscal Year: 2024
Report Date: December 2024
Major Changes: New SEC cybersecurity rules, ESG disclosures added, enhanced AML monitoring, CPRA compliance
""",
# Industrial
"machine_operation_rev1.0.txt": """# Industrial Machine Operation Manual - Rev. 1.0
## Equipment Overview
High-precision CNC milling machine for metal fabrication operations.
Model: IMM-5000
Serial Number: [Unit Specific]
Manufacturer: Industrial Machines Inc.
## Safety Requirements
### Personal Protective Equipment (PPE)
- Safety glasses with side shields (ANSI Z87.1)
- Steel-toed safety boots
- Hearing protection (>85 dB areas)
- Machine operator gloves
- No loose clothing or jewelry
### Machine Safety Features
- Emergency stop button (red mushroom head)
- Safety interlocks on all access doors
- Light curtain protection system
- Audible alarm before operation
- Fire suppression system
## Startup Procedure
### Pre-Startup Checks
1. Inspect machine for visible damage or wear
2. Check all safety guards are in place
3. Verify emergency stop functions properly
4. Ensure work area is clean and clear
5. Check coolant levels (minimum 80%)
6. Inspect cutting tools for wear or damage
7. Verify power supply voltage (480V 3-phase)
### Startup Sequence
1. Turn main power switch to ON position
2. Wait for hydraulic system to pressurize (indicator light)
3. Initialize machine control system (press INIT button)
4. Perform axis homing sequence (X, Y, Z axes)
5. Load machining program into controller
6. Verify tool offset data
7. Perform dry run without material
8. Load workpiece and secure in fixture
9. Set spindle speed and feed rate
10. Begin machining operation
## Operation
### Standard Operating Parameters
- Spindle speed range: 100-6000 RPM
- Feed rate: 1-500 inches per minute
- Maximum workpiece weight: 2000 lbs
- Coolant flow rate: 10 GPM
- Operating temperature: 60-90°F
### Control Panel Functions
- CYCLE START: Begins programmed operation
- CYCLE STOP: Pauses operation
- EMERGENCY STOP: Immediate shutdown
- FEED HOLD: Temporarily pauses feed motion
- JOG: Manual axis movement
- SPINDLE OVERRIDE: Adjust spindle speed (50-150%)
### Monitoring During Operation
- Watch for unusual vibrations
- Listen for abnormal sounds
- Monitor coolant flow
- Check chip evacuation
- Verify dimensional accuracy periodically
- Monitor cutting tool wear
## Shutdown Procedure
1. Complete current machining cycle
2. Press CYCLE STOP button
3. Return spindle to home position
4. Stop spindle rotation
5. Turn off coolant system
6. Remove workpiece
7. Clean machine surfaces and work area
8. Shut down control system
9. Turn off main power switch
10. Complete operator log entry
## Maintenance Schedule
### Daily
- Clean machine surfaces
- Check coolant level and condition
- Inspect cutting tools
- Verify all safety features
- Lubricate way surfaces
### Weekly
- Check hydraulic fluid level
- Inspect electrical connections
- Test emergency stop function
- Clean coolant tank filter
### Monthly
- Full machine cleaning
- Lubrication of all grease points
- Check belt tensions
- Calibrate tools
- Inspect safety guards
### Annual
- Professional maintenance service
- Complete electrical inspection
- Hydraulic system service
- Accuracy verification
- Safety system certification
## Troubleshooting
### Machine Won't Start
- Check main power supply
- Verify emergency stop is reset
- Check for blown fuses
- Inspect door interlocks
### Poor Surface Finish
- Check cutting tool condition
- Verify proper speeds and feeds
- Check machine rigidity
- Inspect coolant flow
### Dimensional Inaccuracy
- Verify tool offsets
- Check for thermal growth
- Inspect ball screws
- Verify workpiece fixturing
Revision: 1.0
Date: January 2023
""",
"machine_operation_rev2.0.txt": """# Industrial Machine Operation Manual - Rev. 2.0
## Equipment Overview
High-precision CNC milling machine for metal fabrication operations.
Model: IMM-5000
Serial Number: [Unit Specific]
Manufacturer: Industrial Machines Inc.
**UPDATED Rev. 2.0**: Enhanced safety features, automated monitoring, and improved procedures.
## Safety Requirements
### Personal Protective Equipment (PPE)
- Safety glasses with side shields (ANSI Z87.1)
- Steel-toed safety boots
- Hearing protection (>85 dB areas)
- Machine operator gloves
- No loose clothing or jewelry
- **NEW**: Cut-resistant sleeves for tool changing
### Machine Safety Features
- Emergency stop button (red mushroom head)
- Safety interlocks on all access doors
- Light curtain protection system
- Audible alarm before operation
- Fire suppression system
- **NEW**: Automatic door locking during operation
- **NEW**: Collision detection system
- **NEW**: Automatic power-off on anomaly detection
- **NEW**: Video monitoring system
- **NEW**: Operator presence detection
### NEW: Enhanced Safety Protocols
- **ADDITION**: Two-person operation required for large workpieces
- **ADDITION**: Mandatory safety briefing before first daily use
- **ADDITION**: Personal lockout/tagout procedures
- **ADDITION**: Near-miss reporting system
- **ADDITION**: Monthly safety drills
## Startup Procedure
### Pre-Startup Checks
1. Inspect machine for visible damage or wear
2. Check all safety guards are in place
3. Verify emergency stop functions properly
4. Ensure work area is clean and clear (5S standards)
5. Check coolant levels (minimum 80%)
6. Inspect cutting tools for wear or damage
7. Verify power supply voltage (480V 3-phase)
8. **NEW**: Complete digital pre-start checklist on HMI
9. **NEW**: Verify backup systems operational
10. **NEW**: Check air pressure (90 PSI minimum)
### Startup Sequence
1. Turn main power switch to ON position
2. Wait for hydraulic system to pressurize (indicator light)
3. Initialize machine control system (press INIT button)
4. Perform axis homing sequence (X, Y, Z axes)
5. **NEW**: System automatically runs diagnostics
6. Load machining program into controller
7. Verify tool offset data
8. **NEW**: Automatic tool measurement cycle
9. Perform dry run without material
10. Load workpiece and secure in fixture
11. **NEW**: Scan operator badge for authorization
12. Set spindle speed and feed rate
13. **NEW**: System verifies parameters within safe limits
14. Begin machining operation
## Operation
### Standard Operating Parameters
- Spindle speed range: 100-8000 RPM (↑ from 6000)
- Feed rate: 1-500 inches per minute
- Maximum workpiece weight: 2000 lbs
- Coolant flow rate: 10 GPM
- Operating temperature: 60-90°F
- **NEW**: Automatic parameter optimization based on material
- **NEW**: Real-time monitoring and adjustment
### Control Panel Functions
- CYCLE START: Begins programmed operation
- CYCLE STOP: Pauses operation
- EMERGENCY STOP: Immediate shutdown
- FEED HOLD: Temporarily pauses feed motion
- JOG: Manual axis movement
- SPINDLE OVERRIDE: Adjust spindle speed (50-150%)
- **NEW**: ADAPTIVE CONTROL: Auto-optimizes feeds/speeds
- **NEW**: REMOTE MONITORING: View status on mobile app
### Monitoring During Operation
- Watch for unusual vibrations
- Listen for abnormal sounds
- Monitor coolant flow
- Check chip evacuation
- Verify dimensional accuracy periodically
- Monitor cutting tool wear
- **NEW**: Automated vibration monitoring alerts operator
- **NEW**: Tool wear prediction system
- **NEW**: Automatic quality checks every 10 parts
- **NEW**: Energy consumption tracking
### NEW: Automated Features
- Automatic tool changer with 40-tool capacity
- In-cycle tool measurement
- Adaptive feed control
- Predictive maintenance alerts
- Remote diagnostics capability
- Automatic program backup
- Production counter with yield tracking
## Shutdown Procedure
1. Complete current machining cycle
2. Press CYCLE STOP button
3. **NEW**: Allow automatic cooldown cycle (2 minutes)
4. Return spindle to home position
5. Stop spindle rotation
6. Turn off coolant system
7. **NEW**: System automatically drains coolant from spindle
8. Remove workpiece
9. Clean machine surfaces and work area
10. **NEW**: Complete digital operator log on HMI
11. Shut down control system
12. Turn off main power switch
13. **NEW**: System generates daily production report
## Maintenance Schedule
### Daily
- Clean machine surfaces
- Check coolant level and condition
- Inspect cutting tools
- Verify all safety features
- Lubricate way surfaces
- **NEW**: Review automated diagnostic report
- **NEW**: Check chip conveyor operation
### Weekly
- Check hydraulic fluid level
- Inspect electrical connections
- Test emergency stop function
- Clean coolant tank filter
- **NEW**: Review vibration analysis data
- **NEW**: Update tool life database
### Monthly
- Full machine cleaning
- Lubrication of all grease points
- Check belt tensions
- Calibrate tools
- Inspect safety guards
- **NEW**: Thermal imaging inspection
- **NEW**: Backup all programs and parameters
### Quarterly (NEW)
- Professional calibration service
- Update control software
- Test all safety interlocks
- Inspect for wear on critical components
- Review maintenance logs
### Annual
- Professional maintenance service
- Complete electrical inspection
- Hydraulic system service
- Accuracy verification (laser interferometer)
- Safety system certification
- **NEW**: Complete machine recalibration
- **NEW**: Operator retraining and certification
## Troubleshooting
### Machine Won't Start
- Check main power supply
- Verify emergency stop is reset
- Check for blown fuses
- Inspect door interlocks
- **NEW**: Review diagnostic error codes on HMI
- **NEW**: Check operator authorization
### Poor Surface Finish
- Check cutting tool condition
- Verify proper speeds and feeds
- Check machine rigidity
- Inspect coolant flow
- **NEW**: Review vibration monitoring data
- **NEW**: Check automatic compensation settings
### Dimensional Inaccuracy
- Verify tool offsets
- Check for thermal growth
- Inspect ball screws
- Verify workpiece fixturing
- **NEW**: Run automatic calibration routine
- **NEW**: Check environmental temperature
### NEW: Automated Diagnostics
- System automatically logs errors
- Predictive maintenance alerts
- Remote support connection available
- QR codes for instant technical manual access
- Video troubleshooting guides on HMI
## NEW SECTION: Industry 4.0 Integration
- IoT connectivity for production monitoring
- Integration with MES (Manufacturing Execution System)
- Real-time OEE (Overall Equipment Effectiveness) tracking
- Automatic inventory management of tools and consumables
- Predictive maintenance using machine learning
- Digital twin simulation capability
Revision: 2.0
Date: January 2024
Major Changes: Enhanced safety features (collision detection, presence sensors), automated monitoring, predictive maintenance, Industry 4.0 connectivity, increased spindle speed range
"""
}
def create_dataset():
"""Create sample dataset directory and files"""
dataset_dir = "sample_data"
os.makedirs(dataset_dir, exist_ok=True)
print(f"Creating sample dataset in '{dataset_dir}' directory...")
for filename, content in SAMPLE_DOCS.items():
filepath = os.path.join(dataset_dir, filename)
with open(filepath, "w", encoding="utf-8") as f:
f.write(content)
print(f"✓ Created {filename}")
print(f"\n✅ Successfully created {len(SAMPLE_DOCS)} sample documents!")
print(f"\nDataset distribution:")
print("- Software (Node.js): 3 files")
print("- Software (Bootstrap): 2 files")
print("- Software (Spark): 2 files")
print("- Healthcare: 2 files")
print("- Finance: 2 files")
print("- Industrial: 2 files")
print("=" * 50)
print("Total: 13 documents covering 6 domains")
if __name__ == "__main__":
create_dataset()